Compare commits

..

12 Commits

Author SHA1 Message Date
nityanandagohain
2593c5eb91 fix: linting issues 2026-04-13 15:44:43 +05:30
Nityananda Gohain
b6b2d36baa Merge branch 'main' into issue_4203 2026-04-10 17:15:08 +05:30
nityanandagohain
a444a039f9 Merge remote-tracking branch 'origin/issue_4203' into issue_4203 2026-04-10 17:13:22 +05:30
nityanandagohain
bfb050ec17 fix: add changes 2026-04-10 16:57:50 +05:30
nityanandagohain
ff3e87f70c Merge remote-tracking branch 'origin/main' into issue_4203 2026-04-09 21:29:11 +05:30
Nityananda Gohain
9ac02ebe00 Merge branch 'main' into issue_4203 2026-03-25 15:50:04 +05:30
nityanandagohain
fbdd0bebbc Merge remote-tracking branch 'origin/main' into issue_4203 2026-03-25 15:21:52 +05:30
nityanandagohain
b2245b48fe fix: retain existing behaviour 2026-03-23 11:03:34 +05:30
Nityananda Gohain
87e654fc73 chore: add comment
Co-authored-by: Tushar Vats <tushar@signoz.io>
2026-03-18 16:54:09 +05:30
nityanandagohain
0ee31ce440 chore: fix tests 2026-03-17 18:16:51 +05:30
nityanandagohain
63e681b87b chore: add integration tests 2026-03-17 15:38:00 +05:30
nityanandagohain
28375c8c1e chore: send all data for trace list api 2026-03-13 19:31:59 +05:30
21 changed files with 712 additions and 334 deletions

View File

@@ -14,7 +14,7 @@ import (
sdkmetric "go.opentelemetry.io/otel/metric"
sdkmetricnoop "go.opentelemetry.io/otel/metric/noop"
sdkresource "go.opentelemetry.io/otel/sdk/resource"
semconv "go.opentelemetry.io/otel/semconv/v1.40.0"
semconv "go.opentelemetry.io/otel/semconv/v1.39.0"
sdktrace "go.opentelemetry.io/otel/trace"
)

View File

@@ -419,6 +419,7 @@ func readAsRaw(rows driver.Rows, queryName string) (*qbtypes.RawData, error) {
rr.Data[name] = val
}
mergeSpanAttributeColumns(rr.Data)
outRows = append(outRows, &rr)
}
if err := rows.Err(); err != nil {
@@ -431,6 +432,48 @@ func readAsRaw(rows driver.Rows, queryName string) (*qbtypes.RawData, error) {
}, nil
}
// mergeSpanAttributeColumns merges the typed ClickHouse span attribute columns
// (attributes_string, attributes_number, attributes_bool, resources_string) into
// unified "attributes" and "resource_attributes" keys, removing the raw columns.
// It is a no-op if none of the raw columns are present.
func mergeSpanAttributeColumns(data map[string]any) {
attrStr, hasStr := data["attributes_string"]
attrNum, hasNum := data["attributes_number"]
attrBool, hasBool := data["attributes_bool"]
// todo(nitya): move to resource json
resStr, hasRes := data["resources_string"]
if !hasStr && !hasNum && !hasBool && !hasRes {
return
}
attributes := make(map[string]any)
if m, ok := attrStr.(map[string]string); ok {
for k, v := range m {
attributes[k] = v
}
}
if m, ok := attrNum.(map[string]float64); ok {
for k, v := range m {
attributes[k] = v
}
}
if m, ok := attrBool.(map[string]bool); ok {
for k, v := range m {
attributes[k] = v
}
}
delete(data, "attributes_string")
delete(data, "attributes_number")
delete(data, "attributes_bool")
data["attributes"] = attributes
if m, ok := resStr.(map[string]string); ok {
data["resource"] = m
}
delete(data, "resources_string")
}
// numericAsFloat converts numeric types to float64 efficiently.
func numericAsFloat(v any) float64 {
switch x := v.(type) {

View File

@@ -207,16 +207,23 @@ func AdjustKey(key *telemetrytypes.TelemetryFieldKey, keys map[string][]*telemet
indexes := []telemetrytypes.JSONDataTypeIndex{}
fieldContextsSeen := map[telemetrytypes.FieldContext]bool{}
dataTypesSeen := map[telemetrytypes.FieldDataType]bool{}
jsonTypesSeen := map[string]*telemetrytypes.JSONDataType{}
for _, matchingKey := range matchingKeys {
materialized = materialized && matchingKey.Materialized
fieldContextsSeen[matchingKey.FieldContext] = true
dataTypesSeen[matchingKey.FieldDataType] = true
if matchingKey.JSONDataType != nil {
jsonTypesSeen[matchingKey.JSONDataType.StringValue()] = matchingKey.JSONDataType
}
indexes = append(indexes, matchingKey.Indexes...)
}
for _, matchingKey := range contextPrefixedMatchingKeys {
materialized = materialized && matchingKey.Materialized
fieldContextsSeen[matchingKey.FieldContext] = true
dataTypesSeen[matchingKey.FieldDataType] = true
if matchingKey.JSONDataType != nil {
jsonTypesSeen[matchingKey.JSONDataType.StringValue()] = matchingKey.JSONDataType
}
indexes = append(indexes, matchingKey.Indexes...)
}
key.Materialized = materialized
@@ -241,6 +248,15 @@ func AdjustKey(key *telemetrytypes.TelemetryFieldKey, keys map[string][]*telemet
break
}
}
if len(jsonTypesSeen) == 1 && key.JSONDataType == nil {
// all matching keys have same JSON data type, use it
for _, jt := range jsonTypesSeen {
actions = append(actions, fmt.Sprintf("Adjusting key %s to have JSON data type %s", key, jt.StringValue()))
key.JSONDataType = jt
break
}
}
}
return actions

View File

@@ -276,12 +276,11 @@ func (m *fieldMapper) FieldFor(ctx context.Context, tsStart, tsEnd uint64, key *
continue
}
if key.FieldDataType == telemetrytypes.FieldDataTypeUnspecified {
if key.JSONDataType == nil {
return "", qbtypes.ErrColumnNotFound
}
jdt := key.GetJSONDataType()
if key.KeyNameContainsArray() && !jdt.IsArray {
if key.KeyNameContainsArray() && !key.JSONDataType.IsArray {
return "", errors.NewInvalidInputf(errors.CodeInvalidInput, "FieldFor not supported for nested fields; only supported for flat paths (e.g. body.status.detail) and paths of Array type: %s(%s)", key.Name, key.FieldDataType)
}

View File

@@ -220,7 +220,7 @@ func TestJSONStmtBuilder_PrimitivePaths(t *testing.T) {
expected: TestExpected{
WhereClause: "(((LOWER(toString(dynamicElement(body_v2.`user.age`, 'Int64'))) LIKE LOWER(?)) AND has(JSONAllPaths(body_v2), 'user.age')) OR ((LOWER(dynamicElement(body_v2.`user.age`, 'String')) LIKE LOWER(?)) AND has(JSONAllPaths(body_v2), 'user.age')))",
Args: []any{uint64(1747945619), uint64(1747983448), "%25%", "%25%", "1747947419000000000", uint64(1747945619), "1747983448000000000", uint64(1747983448), 10},
Warnings: []string{"Key `user.age` is ambiguous, found 2 different combinations of field context / data type: [name=user.age,context=body,datatype=int64 name=user.age,context=body,datatype=string]."},
Warnings: []string{"Key `user.age` is ambiguous, found 2 different combinations of field context / data type: [name=user.age,context=body,datatype=int64,jsondatatype=Int64 name=user.age,context=body,datatype=string,jsondatatype=String]."},
},
},
{
@@ -414,7 +414,7 @@ func TestStatementBuilderListQueryBodyPromoted(t *testing.T) {
},
expected: TestExpected{
Args: []any{uint64(1747945619), uint64(1747983448), "%1.65%", 1.65, "%1.65%", 1.65, "%1.65%", 1.65, "%1.65%", 1.65, "1747947419000000000", uint64(1747945619), "1747983448000000000", uint64(1747983448), 10},
Warnings: []string{"Key `education[].parameters` is ambiguous, found 2 different combinations of field context / data type: [name=education[].parameters,context=body,datatype=[]float64,materialized=true name=education[].parameters,context=body,datatype=[]dynamic,materialized=true]."},
Warnings: []string{"Key `education[].parameters` is ambiguous, found 2 different combinations of field context / data type: [name=education[].parameters,context=body,datatype=[]float64,materialized=true,jsondatatype=Array(Nullable(Float64)) name=education[].parameters,context=body,datatype=[]dynamic,materialized=true,jsondatatype=Array(Dynamic)]."},
},
expectedErr: nil,
},
@@ -441,7 +441,7 @@ func TestStatementBuilderListQueryBodyPromoted(t *testing.T) {
},
expected: TestExpected{
Args: []any{uint64(1747945619), uint64(1747983448), "%true%", true, "%true%", true, "%true%", true, "%true%", true, "1747947419000000000", uint64(1747945619), "1747983448000000000", uint64(1747983448), 10},
Warnings: []string{"Key `education[].parameters` is ambiguous, found 2 different combinations of field context / data type: [name=education[].parameters,context=body,datatype=[]float64,materialized=true name=education[].parameters,context=body,datatype=[]dynamic,materialized=true]."},
Warnings: []string{"Key `education[].parameters` is ambiguous, found 2 different combinations of field context / data type: [name=education[].parameters,context=body,datatype=[]float64,materialized=true,jsondatatype=Array(Nullable(Float64)) name=education[].parameters,context=body,datatype=[]dynamic,materialized=true,jsondatatype=Array(Dynamic)]."},
},
expectedErr: nil,
},
@@ -455,7 +455,7 @@ func TestStatementBuilderListQueryBodyPromoted(t *testing.T) {
},
expected: TestExpected{
Args: []any{uint64(1747945619), uint64(1747983448), "%passed%", "passed", "%passed%", "passed", "%passed%", "passed", "%passed%", "passed", "1747947419000000000", uint64(1747945619), "1747983448000000000", uint64(1747983448), 10},
Warnings: []string{"Key `education[].parameters` is ambiguous, found 2 different combinations of field context / data type: [name=education[].parameters,context=body,datatype=[]float64,materialized=true name=education[].parameters,context=body,datatype=[]dynamic,materialized=true]."},
Warnings: []string{"Key `education[].parameters` is ambiguous, found 2 different combinations of field context / data type: [name=education[].parameters,context=body,datatype=[]float64,materialized=true,jsondatatype=Array(Nullable(Float64)) name=education[].parameters,context=body,datatype=[]dynamic,materialized=true,jsondatatype=Array(Dynamic)]."},
},
expectedErr: nil,
},
@@ -549,7 +549,7 @@ func TestJSONStmtBuilder_ArrayPaths(t *testing.T) {
expected: TestExpected{
WhereClause: "((NOT arrayExists(`body_v2.education`-> toFloat64OrNull(dynamicElement(`body_v2.education`.`type`, 'String')) = ?, dynamicElement(body_v2.`education`, 'Array(JSON(max_dynamic_types=16, max_dynamic_paths=0))'))) AND (NOT arrayExists(`body_v2.education`-> dynamicElement(`body_v2.education`.`type`, 'Int64') = ?, dynamicElement(body_v2.`education`, 'Array(JSON(max_dynamic_types=16, max_dynamic_paths=0))'))))",
Args: []any{uint64(1747945619), uint64(1747983448), int64(10001), int64(10001), "1747947419000000000", uint64(1747945619), "1747983448000000000", uint64(1747983448), 10},
Warnings: []string{"Key `education[].type` is ambiguous, found 2 different combinations of field context / data type: [name=education[].type,context=body,datatype=string name=education[].type,context=body,datatype=int64]."},
Warnings: []string{"Key `education[].type` is ambiguous, found 2 different combinations of field context / data type: [name=education[].type,context=body,datatype=string,jsondatatype=String name=education[].type,context=body,datatype=int64,jsondatatype=Int64]."},
},
},
{
@@ -576,7 +576,7 @@ func TestJSONStmtBuilder_ArrayPaths(t *testing.T) {
expected: TestExpected{
WhereClause: "(((arrayExists(`body_v2.education`-> (arrayExists(x -> LOWER(toString(x)) LIKE LOWER(?), dynamicElement(`body_v2.education`.`parameters`, 'Array(Nullable(Float64))')) OR arrayExists(x -> toFloat64(x) = ?, dynamicElement(`body_v2.education`.`parameters`, 'Array(Nullable(Float64))'))), dynamicElement(body_v2.`education`, 'Array(JSON(max_dynamic_types=16, max_dynamic_paths=0))'))) AND has(JSONAllPaths(body_v2), 'education')) OR ((arrayExists(`body_v2.education`-> (arrayExists(x -> LOWER(toString(x)) LIKE LOWER(?), arrayFilter(x->(dynamicType(x) IN ('String', 'Int64', 'Float64', 'Bool')), dynamicElement(`body_v2.education`.`parameters`, 'Array(Dynamic)'))) OR arrayExists(x -> accurateCastOrNull(x, 'Float64') = ?, arrayFilter(x->(dynamicType(x) IN ('String', 'Int64', 'Float64', 'Bool')), dynamicElement(`body_v2.education`.`parameters`, 'Array(Dynamic)')))), dynamicElement(body_v2.`education`, 'Array(JSON(max_dynamic_types=16, max_dynamic_paths=0))'))) AND has(JSONAllPaths(body_v2), 'education')))",
Args: []any{uint64(1747945619), uint64(1747983448), "%1.65%", 1.65, "%1.65%", 1.65, "1747947419000000000", uint64(1747945619), "1747983448000000000", uint64(1747983448), 10},
Warnings: []string{"Key `education[].parameters` is ambiguous, found 2 different combinations of field context / data type: [name=education[].parameters,context=body,datatype=[]float64 name=education[].parameters,context=body,datatype=[]dynamic]."},
Warnings: []string{"Key `education[].parameters` is ambiguous, found 2 different combinations of field context / data type: [name=education[].parameters,context=body,datatype=[]float64,jsondatatype=Array(Nullable(Float64)) name=education[].parameters,context=body,datatype=[]dynamic,jsondatatype=Array(Dynamic)]."},
},
},
{
@@ -585,7 +585,7 @@ func TestJSONStmtBuilder_ArrayPaths(t *testing.T) {
expected: TestExpected{
WhereClause: "(((arrayExists(`body_v2.education`-> arrayExists(x -> toString(x) = ?, dynamicElement(`body_v2.education`.`parameters`, 'Array(Nullable(Float64))')), dynamicElement(body_v2.`education`, 'Array(JSON(max_dynamic_types=16, max_dynamic_paths=0))'))) AND has(JSONAllPaths(body_v2), 'education')) OR ((arrayExists(`body_v2.education`-> arrayExists(x -> toString(x) = ?, arrayFilter(x->(dynamicType(x) IN ('String', 'Int64', 'Float64', 'Bool')), dynamicElement(`body_v2.education`.`parameters`, 'Array(Dynamic)'))), dynamicElement(body_v2.`education`, 'Array(JSON(max_dynamic_types=16, max_dynamic_paths=0))'))) AND has(JSONAllPaths(body_v2), 'education')))",
Args: []any{uint64(1747945619), uint64(1747983448), "passed", "passed", "1747947419000000000", uint64(1747945619), "1747983448000000000", uint64(1747983448), 10},
Warnings: []string{"Key `education[].parameters` is ambiguous, found 2 different combinations of field context / data type: [name=education[].parameters,context=body,datatype=[]float64 name=education[].parameters,context=body,datatype=[]dynamic]."},
Warnings: []string{"Key `education[].parameters` is ambiguous, found 2 different combinations of field context / data type: [name=education[].parameters,context=body,datatype=[]float64,jsondatatype=Array(Nullable(Float64)) name=education[].parameters,context=body,datatype=[]dynamic,jsondatatype=Array(Dynamic)]."},
},
},
{
@@ -594,7 +594,7 @@ func TestJSONStmtBuilder_ArrayPaths(t *testing.T) {
expected: TestExpected{
WhereClause: "(((arrayExists(`body_v2.education`-> (arrayExists(x -> LOWER(toString(x)) LIKE LOWER(?), dynamicElement(`body_v2.education`.`parameters`, 'Array(Nullable(Float64))')) OR arrayExists(x -> toString(x) = ?, dynamicElement(`body_v2.education`.`parameters`, 'Array(Nullable(Float64))'))), dynamicElement(body_v2.`education`, 'Array(JSON(max_dynamic_types=16, max_dynamic_paths=0))'))) AND has(JSONAllPaths(body_v2), 'education')) OR ((arrayExists(`body_v2.education`-> (arrayExists(x -> LOWER(toString(x)) LIKE LOWER(?), arrayFilter(x->(dynamicType(x) IN ('String', 'Int64', 'Float64', 'Bool')), dynamicElement(`body_v2.education`.`parameters`, 'Array(Dynamic)'))) OR arrayExists(x -> toString(x) = ?, arrayFilter(x->(dynamicType(x) IN ('String', 'Int64', 'Float64', 'Bool')), dynamicElement(`body_v2.education`.`parameters`, 'Array(Dynamic)')))), dynamicElement(body_v2.`education`, 'Array(JSON(max_dynamic_types=16, max_dynamic_paths=0))'))) AND has(JSONAllPaths(body_v2), 'education')))",
Args: []any{uint64(1747945619), uint64(1747983448), "%passed%", "passed", "%passed%", "passed", "1747947419000000000", uint64(1747945619), "1747983448000000000", uint64(1747983448), 10},
Warnings: []string{"Key `education[].parameters` is ambiguous, found 2 different combinations of field context / data type: [name=education[].parameters,context=body,datatype=[]float64 name=education[].parameters,context=body,datatype=[]dynamic]."},
Warnings: []string{"Key `education[].parameters` is ambiguous, found 2 different combinations of field context / data type: [name=education[].parameters,context=body,datatype=[]float64,jsondatatype=Array(Nullable(Float64)) name=education[].parameters,context=body,datatype=[]dynamic,jsondatatype=Array(Dynamic)]."},
},
},
{
@@ -603,7 +603,7 @@ func TestJSONStmtBuilder_ArrayPaths(t *testing.T) {
expected: TestExpected{
WhereClause: "(((arrayExists(`body_v2.education`-> arrayExists(x -> toFloat64(x) IN (?, ?), dynamicElement(`body_v2.education`.`parameters`, 'Array(Nullable(Float64))')), dynamicElement(body_v2.`education`, 'Array(JSON(max_dynamic_types=16, max_dynamic_paths=0))'))) AND has(JSONAllPaths(body_v2), 'education')) OR ((arrayExists(`body_v2.education`-> arrayExists(x -> toString(x) IN (?, ?), arrayFilter(x->(dynamicType(x) IN ('String', 'Int64', 'Float64', 'Bool')), dynamicElement(`body_v2.education`.`parameters`, 'Array(Dynamic)'))), dynamicElement(body_v2.`education`, 'Array(JSON(max_dynamic_types=16, max_dynamic_paths=0))'))) AND has(JSONAllPaths(body_v2), 'education')))",
Args: []any{uint64(1747945619), uint64(1747983448), 1.65, 1.99, "1.65", "1.99", "1747947419000000000", uint64(1747945619), "1747983448000000000", uint64(1747983448), 10},
Warnings: []string{"Key `education[].parameters` is ambiguous, found 2 different combinations of field context / data type: [name=education[].parameters,context=body,datatype=[]float64 name=education[].parameters,context=body,datatype=[]dynamic]."},
Warnings: []string{"Key `education[].parameters` is ambiguous, found 2 different combinations of field context / data type: [name=education[].parameters,context=body,datatype=[]float64,jsondatatype=Array(Nullable(Float64)) name=education[].parameters,context=body,datatype=[]dynamic,jsondatatype=Array(Dynamic)]."},
},
},
{
@@ -612,7 +612,7 @@ func TestJSONStmtBuilder_ArrayPaths(t *testing.T) {
expected: TestExpected{
WhereClause: "((NOT arrayExists(`body_v2.education`-> (arrayExists(x -> LOWER(toString(x)) LIKE LOWER(?), dynamicElement(`body_v2.education`.`parameters`, 'Array(Nullable(Float64))')) OR arrayExists(x -> toFloat64(x) = ?, dynamicElement(`body_v2.education`.`parameters`, 'Array(Nullable(Float64))'))), dynamicElement(body_v2.`education`, 'Array(JSON(max_dynamic_types=16, max_dynamic_paths=0))'))) AND (NOT arrayExists(`body_v2.education`-> (arrayExists(x -> LOWER(toString(x)) LIKE LOWER(?), arrayFilter(x->(dynamicType(x) IN ('String', 'Int64', 'Float64', 'Bool')), dynamicElement(`body_v2.education`.`parameters`, 'Array(Dynamic)'))) OR arrayExists(x -> accurateCastOrNull(x, 'Float64') = ?, arrayFilter(x->(dynamicType(x) IN ('String', 'Int64', 'Float64', 'Bool')), dynamicElement(`body_v2.education`.`parameters`, 'Array(Dynamic)')))), dynamicElement(body_v2.`education`, 'Array(JSON(max_dynamic_types=16, max_dynamic_paths=0))'))))",
Args: []any{uint64(1747945619), uint64(1747983448), "%1.65%", float64(1.65), "%1.65%", float64(1.65), "1747947419000000000", uint64(1747945619), "1747983448000000000", uint64(1747983448), 10},
Warnings: []string{"Key `education[].parameters` is ambiguous, found 2 different combinations of field context / data type: [name=education[].parameters,context=body,datatype=[]float64 name=education[].parameters,context=body,datatype=[]dynamic]."},
Warnings: []string{"Key `education[].parameters` is ambiguous, found 2 different combinations of field context / data type: [name=education[].parameters,context=body,datatype=[]float64,jsondatatype=Array(Nullable(Float64)) name=education[].parameters,context=body,datatype=[]dynamic,jsondatatype=Array(Dynamic)]."},
},
},
{
@@ -622,7 +622,7 @@ func TestJSONStmtBuilder_ArrayPaths(t *testing.T) {
WhereClause: "(has(arrayFlatten(arrayConcat(arrayMap(`body_v2.education`->dynamicElement(`body_v2.education`.`parameters`, 'Array(Nullable(Float64))'), dynamicElement(body_v2.`education`, 'Array(JSON(max_dynamic_types=16, max_dynamic_paths=0))')))), ?) OR has(arrayFlatten(arrayConcat(arrayMap(`body_v2.education`->dynamicElement(`body_v2.education`.`parameters`, 'Array(Dynamic)'), dynamicElement(body_v2.`education`, 'Array(JSON(max_dynamic_types=16, max_dynamic_paths=0))')))), ?))",
Args: []any{uint64(1747945619), uint64(1747983448), 1.65, 1.65, "1747947419000000000", uint64(1747945619), "1747983448000000000", uint64(1747983448), 10},
Warnings: []string{
"Key `education[].parameters` is ambiguous, found 2 different combinations of field context / data type: [name=education[].parameters,context=body,datatype=[]float64 name=education[].parameters,context=body,datatype=[]dynamic].",
"Key `education[].parameters` is ambiguous, found 2 different combinations of field context / data type: [name=education[].parameters,context=body,datatype=[]float64,jsondatatype=Array(Nullable(Float64)) name=education[].parameters,context=body,datatype=[]dynamic,jsondatatype=Array(Dynamic)].",
},
},
},
@@ -702,7 +702,7 @@ func TestJSONStmtBuilder_ArrayPaths(t *testing.T) {
expected: TestExpected{
WhereClause: "(((arrayExists(`body_v2.interests`-> arrayExists(`body_v2.interests[].entities`-> arrayExists(`body_v2.interests[].entities[].reviews`-> arrayExists(`body_v2.interests[].entities[].reviews[].entries`-> arrayExists(`body_v2.interests[].entities[].reviews[].entries[].metadata`-> arrayExists(`body_v2.interests[].entities[].reviews[].entries[].metadata[].positions`-> (arrayExists(x -> LOWER(toString(x)) LIKE LOWER(?), dynamicElement(`body_v2.interests[].entities[].reviews[].entries[].metadata[].positions`.`ratings`, 'Array(Nullable(Int64))')) OR arrayExists(x -> toFloat64(x) = ?, dynamicElement(`body_v2.interests[].entities[].reviews[].entries[].metadata[].positions`.`ratings`, 'Array(Nullable(Int64))'))), dynamicElement(`body_v2.interests[].entities[].reviews[].entries[].metadata`.`positions`, 'Array(JSON(max_dynamic_types=0, max_dynamic_paths=0))')), dynamicElement(`body_v2.interests[].entities[].reviews[].entries`.`metadata`, 'Array(JSON(max_dynamic_types=1, max_dynamic_paths=0))')), dynamicElement(`body_v2.interests[].entities[].reviews`.`entries`, 'Array(JSON(max_dynamic_types=2, max_dynamic_paths=0))')), dynamicElement(`body_v2.interests[].entities`.`reviews`, 'Array(JSON(max_dynamic_types=4, max_dynamic_paths=0))')), dynamicElement(`body_v2.interests`.`entities`, 'Array(JSON(max_dynamic_types=8, max_dynamic_paths=0))')), dynamicElement(body_v2.`interests`, 'Array(JSON(max_dynamic_types=16, max_dynamic_paths=0))'))) AND has(JSONAllPaths(body_v2), 'interests')) OR ((arrayExists(`body_v2.interests`-> arrayExists(`body_v2.interests[].entities`-> arrayExists(`body_v2.interests[].entities[].reviews`-> arrayExists(`body_v2.interests[].entities[].reviews[].entries`-> arrayExists(`body_v2.interests[].entities[].reviews[].entries[].metadata`-> arrayExists(`body_v2.interests[].entities[].reviews[].entries[].metadata[].positions`-> (arrayExists(x -> LOWER(x) LIKE LOWER(?), dynamicElement(`body_v2.interests[].entities[].reviews[].entries[].metadata[].positions`.`ratings`, 'Array(Nullable(String))')) OR arrayExists(x -> toFloat64OrNull(x) = ?, dynamicElement(`body_v2.interests[].entities[].reviews[].entries[].metadata[].positions`.`ratings`, 'Array(Nullable(String))'))), dynamicElement(`body_v2.interests[].entities[].reviews[].entries[].metadata`.`positions`, 'Array(JSON(max_dynamic_types=0, max_dynamic_paths=0))')), dynamicElement(`body_v2.interests[].entities[].reviews[].entries`.`metadata`, 'Array(JSON(max_dynamic_types=1, max_dynamic_paths=0))')), dynamicElement(`body_v2.interests[].entities[].reviews`.`entries`, 'Array(JSON(max_dynamic_types=2, max_dynamic_paths=0))')), dynamicElement(`body_v2.interests[].entities`.`reviews`, 'Array(JSON(max_dynamic_types=4, max_dynamic_paths=0))')), dynamicElement(`body_v2.interests`.`entities`, 'Array(JSON(max_dynamic_types=8, max_dynamic_paths=0))')), dynamicElement(body_v2.`interests`, 'Array(JSON(max_dynamic_types=16, max_dynamic_paths=0))'))) AND has(JSONAllPaths(body_v2), 'interests')))",
Args: []any{uint64(1747945619), uint64(1747983448), "%4%", float64(4), "%4%", float64(4), "1747947419000000000", uint64(1747945619), "1747983448000000000", uint64(1747983448), 10},
Warnings: []string{"Key `interests[].entities[].reviews[].entries[].metadata[].positions[].ratings` is ambiguous, found 2 different combinations of field context / data type: [name=interests[].entities[].reviews[].entries[].metadata[].positions[].ratings,context=body,datatype=[]int64 name=interests[].entities[].reviews[].entries[].metadata[].positions[].ratings,context=body,datatype=[]string]."},
Warnings: []string{"Key `interests[].entities[].reviews[].entries[].metadata[].positions[].ratings` is ambiguous, found 2 different combinations of field context / data type: [name=interests[].entities[].reviews[].entries[].metadata[].positions[].ratings,context=body,datatype=[]int64,jsondatatype=Array(Nullable(Int64)) name=interests[].entities[].reviews[].entries[].metadata[].positions[].ratings,context=body,datatype=[]string,jsondatatype=Array(Nullable(String))]."},
},
},
{
@@ -946,16 +946,16 @@ func buildTestTelemetryMetadataStore(t *testing.T, addIndexes bool) *telemetryty
mockMetadataStore := telemetrytypestest.NewMockMetadataStore()
mockMetadataStore.SetStaticFields(IntrinsicFields)
types, _ := telemetrytypes.TestJSONTypeSet()
for path, fieldDataTypes := range types {
for _, fdt := range fieldDataTypes {
for path, jsonTypes := range types {
for _, jsonType := range jsonTypes {
key := &telemetrytypes.TelemetryFieldKey{
Name: path,
Signal: telemetrytypes.SignalLogs,
FieldContext: telemetrytypes.FieldContextBody,
FieldDataType: fdt,
FieldDataType: telemetrytypes.MappingJSONDataTypeToFieldDataType[jsonType],
JSONDataType: &jsonType,
}
if addIndexes {
jsonType := telemetrytypes.MappingFieldDataTypeToJSONDataType[fdt]
idx := slices.IndexFunc(telemetrytypes.TestIndexedPaths, func(entry telemetrytypes.TestIndexedPathEntry) bool {
return entry.Path == path && entry.Type == jsonType
})

View File

@@ -875,6 +875,7 @@ func TestAdjustKey(t *testing.T) {
require.Equal(t, c.expectedKey.FieldContext, key.FieldContext, "field context should match")
require.Equal(t, c.expectedKey.FieldDataType, key.FieldDataType, "field data type should match")
require.Equal(t, c.expectedKey.Materialized, key.Materialized, "materialized should match")
require.Equal(t, c.expectedKey.JSONDataType, key.JSONDataType, "json data type should match")
require.Equal(t, c.expectedKey.Indexes, key.Indexes, "json exists should match")
})
}

View File

@@ -21,72 +21,133 @@ import (
)
var (
defaultPathLimit = 100 // Default limit to prevent full table scans
CodeUnknownJSONDataType = errors.MustNewCode("unknown_json_data_type")
CodeFailLoadPromotedPaths = errors.MustNewCode("fail_load_promoted_paths")
CodeFailCheckPathPromoted = errors.MustNewCode("fail_check_path_promoted")
CodeFailIterateBodyJSONKeys = errors.MustNewCode("fail_iterate_body_json_keys")
CodeFailExtractBodyJSONKeys = errors.MustNewCode("fail_extract_body_json_keys")
CodeFailLoadLogsJSONIndexes = errors.MustNewCode("fail_load_logs_json_indexes")
CodeFailListJSONValues = errors.MustNewCode("fail_list_json_values")
CodeFailScanJSONValue = errors.MustNewCode("fail_scan_json_value")
CodeFailScanVariant = errors.MustNewCode("fail_scan_variant")
CodeFailBuildJSONPathsQuery = errors.MustNewCode("fail_build_json_paths_query")
CodeNoPathsToQueryIndexes = errors.MustNewCode("no_paths_to_query_indexes_provided")
CodeFailedToPrepareBatch = errors.MustNewCode("failed_to_prepare_batch_promoted_paths")
CodeFailedToSendBatch = errors.MustNewCode("failed_to_send_batch_promoted_paths")
CodeFailedToAppendPath = errors.MustNewCode("failed_to_append_path_promoted_paths")
)
// enrichBodyKeys enriches body-context keys with promoted path info, indexes,
// and JSON access plans.
// parentTypeCache contains parent array types (ArrayJSON/ArrayDynamic) pre-fetched in the main UNION query.
func (t *telemetryMetaStore) enrichBodyKeys(ctx context.Context, keys []*telemetrytypes.TelemetryFieldKey, parentTypeCache map[string][]telemetrytypes.FieldDataType) error {
if len(keys) == 0 {
return nil
}
// fetchBodyJSONPaths extracts body JSON paths from the path_types table
// This function can be used by both JSONQueryBuilder and metadata extraction
// uniquePathLimit: 0 for no limit, >0 for maximum number of unique paths to return
// - For startup load: set to 10000 to get top 10k unique paths
// - For lookup: set to 0 (no limit needed for single path)
// - For metadata API: set to desired pagination limit
//
// searchOperator: LIKE for pattern matching, EQUAL for exact match.
func (t *telemetryMetaStore) fetchBodyJSONPaths(ctx context.Context,
fieldKeySelectors []*telemetrytypes.FieldKeySelector) ([]*telemetrytypes.TelemetryFieldKey, []string, bool, error) {
ctx = ctxtypes.NewContextWithCommentVals(ctx, map[string]string{
instrumentationtypes.TelemetrySignal: telemetrytypes.SignalLogs.StringValue(),
instrumentationtypes.CodeNamespace: "metadata",
instrumentationtypes.CodeFunctionName: "fetchBodyJSONPaths",
})
var filteredKeys []*telemetrytypes.TelemetryFieldKey
for _, key := range keys {
if key.FieldContext == telemetrytypes.FieldContextBody {
filteredKeys = append(filteredKeys, key)
query, args, limit := buildGetBodyJSONPathsQuery(fieldKeySelectors)
rows, err := t.telemetrystore.ClickhouseDB().Query(ctx, query, args...)
if err != nil {
return nil, nil, false, errors.WrapInternalf(err, CodeFailExtractBodyJSONKeys, "failed to extract body JSON keys")
}
defer rows.Close()
fieldKeys := []*telemetrytypes.TelemetryFieldKey{}
paths := []string{}
rowCount := 0
for rows.Next() {
var path string
var typesArray []string // ClickHouse returns array as []string
var lastSeen uint64
err = rows.Scan(&path, &typesArray, &lastSeen)
if err != nil {
return nil, nil, false, errors.WrapInternalf(err, CodeFailExtractBodyJSONKeys, "failed to scan body JSON key row")
}
for _, typ := range typesArray {
mapping, found := telemetrytypes.MappingStringToJSONDataType[typ]
if !found {
t.logger.ErrorContext(ctx, "failed to map type string to JSON data type", slog.String("type", typ), slog.String("path", path))
continue
}
fieldKeys = append(fieldKeys, &telemetrytypes.TelemetryFieldKey{
Name: path,
Signal: telemetrytypes.SignalLogs,
FieldContext: telemetrytypes.FieldContextBody,
FieldDataType: telemetrytypes.MappingJSONDataTypeToFieldDataType[mapping],
JSONDataType: &mapping,
})
}
paths = append(paths, path)
rowCount++
}
if rows.Err() != nil {
return nil, nil, false, errors.WrapInternalf(rows.Err(), CodeFailIterateBodyJSONKeys, "error iterating body JSON keys")
}
// collect paths for batch queries
paths := make([]string, 0, len(filteredKeys))
for _, key := range filteredKeys {
paths = append(paths, key.Name)
}
// fetch promoted paths
promoted, err := t.GetPromotedPaths(ctx, paths...)
if err != nil {
return err
}
// fetch JSON path indexes
indexes, err := t.getJSONPathIndexes(ctx, paths...)
if err != nil {
return err
}
// apply promoted/index metadata to keys
for _, key := range filteredKeys {
promotedKey := strings.Split(key.Name, telemetrytypes.ArraySep)[0]
key.Materialized = promoted[promotedKey]
key.Indexes = indexes[key.Name]
}
// build JSON access plans using the pre-fetched parent type cache
return t.buildJSONPlans(ctx, filteredKeys, parentTypeCache)
return fieldKeys, paths, rowCount <= limit, nil
}
// buildJSONPlans builds JSON access plans for the given keys
// using the provided parent type cache (pre-fetched in the main UNION query).
func (t *telemetryMetaStore) buildJSONPlans(_ context.Context, keys []*telemetrytypes.TelemetryFieldKey, typeCache map[string][]telemetrytypes.FieldDataType) error {
if len(keys) == 0 {
return nil
func (t *telemetryMetaStore) buildBodyJSONPaths(ctx context.Context,
fieldKeySelectors []*telemetrytypes.FieldKeySelector) ([]*telemetrytypes.TelemetryFieldKey, bool, error) {
fieldKeys, paths, finished, err := t.fetchBodyJSONPaths(ctx, fieldKeySelectors)
if err != nil {
return nil, false, err
}
columnMeta := t.jsonColumnMetadata[telemetrytypes.SignalLogs][telemetrytypes.FieldContextBody]
promoted, err := t.GetPromotedPaths(ctx, paths...)
if err != nil {
return nil, false, err
}
indexes, err := t.getJSONPathIndexes(ctx, paths...)
if err != nil {
return nil, false, err
}
for _, fieldKey := range fieldKeys {
promotedKey := strings.Split(fieldKey.Name, telemetrytypes.ArraySep)[0]
fieldKey.Materialized = promoted[promotedKey]
fieldKey.Indexes = indexes[fieldKey.Name]
}
return fieldKeys, finished, t.buildJSONPlans(ctx, fieldKeys)
}
func (t *telemetryMetaStore) buildJSONPlans(ctx context.Context, keys []*telemetrytypes.TelemetryFieldKey) error {
parentSelectors := make([]*telemetrytypes.FieldKeySelector, 0, len(keys))
for _, key := range keys {
if err := key.SetJSONAccessPlan(columnMeta, typeCache); err != nil {
parentSelectors = append(parentSelectors, key.ArrayParentSelectors()...)
}
parentKeys, _, _, err := t.fetchBodyJSONPaths(ctx, parentSelectors)
if err != nil {
return err
}
typeCache := make(map[string][]telemetrytypes.JSONDataType)
for _, key := range parentKeys {
typeCache[key.Name] = append(typeCache[key.Name], *key.JSONDataType)
}
// build plans for keys now
for _, key := range keys {
err = key.SetJSONAccessPlan(t.jsonColumnMetadata[telemetrytypes.SignalLogs][telemetrytypes.FieldContextBody], typeCache)
if err != nil {
return err
}
}
@@ -94,6 +155,51 @@ func (t *telemetryMetaStore) buildJSONPlans(_ context.Context, keys []*telemetry
return nil
}
func buildGetBodyJSONPathsQuery(fieldKeySelectors []*telemetrytypes.FieldKeySelector) (string, []any, int) {
if len(fieldKeySelectors) == 0 {
return "", nil, defaultPathLimit
}
from := fmt.Sprintf("%s.%s", DBName, PathTypesTableName)
// Build a better query using GROUP BY to deduplicate at database level
// This aggregates all types per path and gets the max last_seen, then applies LIMIT
sb := sqlbuilder.Select(
"path",
"groupArray(DISTINCT type) AS types",
"max(last_seen) AS last_seen",
).From(from)
limit := 0
// Add search filter if provided
orClauses := []string{}
for _, fieldKeySelector := range fieldKeySelectors {
// replace [*] with []
fieldKeySelector.Name = strings.ReplaceAll(fieldKeySelector.Name, telemetrytypes.ArrayAnyIndex, telemetrytypes.ArraySep)
// Extract search text for body JSON keys
keyName := CleanPathPrefixes(fieldKeySelector.Name)
if fieldKeySelector.SelectorMatchType == telemetrytypes.FieldSelectorMatchTypeExact {
orClauses = append(orClauses, sb.Equal("path", keyName))
} else {
// Pattern matching for metadata API (defaults to LIKE behavior for other operators)
orClauses = append(orClauses, sb.ILike("path", fmt.Sprintf("%%%s%%", querybuilder.FormatValueForContains(keyName))))
}
limit += fieldKeySelector.Limit
}
sb.Where(sb.Or(orClauses...))
// Group by path to get unique paths with aggregated types
sb.GroupBy("path")
// Order by max last_seen to get most recent paths first
sb.OrderBy("last_seen DESC")
if limit == 0 {
limit = defaultPathLimit
}
sb.Limit(limit)
query, args := sb.BuildWithFlavor(sqlbuilder.ClickHouse)
return query, args, limit
}
func (t *telemetryMetaStore) getJSONPathIndexes(ctx context.Context, paths ...string) (map[string][]telemetrytypes.JSONDataTypeIndex, error) {
filteredPaths := []string{}
for _, path := range paths {

View File

@@ -7,9 +7,99 @@ import (
"github.com/SigNoz/signoz-otel-collector/constants"
"github.com/SigNoz/signoz/pkg/querybuilder"
"github.com/SigNoz/signoz/pkg/telemetrylogs"
"github.com/SigNoz/signoz/pkg/types/telemetrytypes"
"github.com/stretchr/testify/require"
)
func TestBuildGetBodyJSONPathsQuery(t *testing.T) {
testCases := []struct {
name string
fieldKeySelectors []*telemetrytypes.FieldKeySelector
expectedSQL string
expectedArgs []any
expectedLimit int
}{
{
name: "Single search text with EQUAL operator",
fieldKeySelectors: []*telemetrytypes.FieldKeySelector{
{
Name: "user.name",
SelectorMatchType: telemetrytypes.FieldSelectorMatchTypeExact,
},
},
expectedSQL: "SELECT path, groupArray(DISTINCT type) AS types, max(last_seen) AS last_seen FROM signoz_metadata.distributed_json_path_types WHERE (path = ?) GROUP BY path ORDER BY last_seen DESC LIMIT ?",
expectedArgs: []any{"user.name", defaultPathLimit},
expectedLimit: defaultPathLimit,
},
{
name: "Single search text with LIKE operator",
fieldKeySelectors: []*telemetrytypes.FieldKeySelector{
{
Name: "user",
SelectorMatchType: telemetrytypes.FieldSelectorMatchTypeFuzzy,
},
},
expectedSQL: "SELECT path, groupArray(DISTINCT type) AS types, max(last_seen) AS last_seen FROM signoz_metadata.distributed_json_path_types WHERE (LOWER(path) LIKE LOWER(?)) GROUP BY path ORDER BY last_seen DESC LIMIT ?",
expectedArgs: []any{"%user%", 100},
expectedLimit: 100,
},
{
name: "Multiple search texts with EQUAL operator",
fieldKeySelectors: []*telemetrytypes.FieldKeySelector{
{
Name: "user.name",
SelectorMatchType: telemetrytypes.FieldSelectorMatchTypeExact,
},
{
Name: "user.age",
SelectorMatchType: telemetrytypes.FieldSelectorMatchTypeExact,
},
},
expectedSQL: "SELECT path, groupArray(DISTINCT type) AS types, max(last_seen) AS last_seen FROM signoz_metadata.distributed_json_path_types WHERE (path = ? OR path = ?) GROUP BY path ORDER BY last_seen DESC LIMIT ?",
expectedArgs: []any{"user.name", "user.age", defaultPathLimit},
expectedLimit: defaultPathLimit,
},
{
name: "Multiple search texts with LIKE operator",
fieldKeySelectors: []*telemetrytypes.FieldKeySelector{
{
Name: "user",
SelectorMatchType: telemetrytypes.FieldSelectorMatchTypeFuzzy,
},
{
Name: "admin",
SelectorMatchType: telemetrytypes.FieldSelectorMatchTypeFuzzy,
},
},
expectedSQL: "SELECT path, groupArray(DISTINCT type) AS types, max(last_seen) AS last_seen FROM signoz_metadata.distributed_json_path_types WHERE (LOWER(path) LIKE LOWER(?) OR LOWER(path) LIKE LOWER(?)) GROUP BY path ORDER BY last_seen DESC LIMIT ?",
expectedArgs: []any{"%user%", "%admin%", defaultPathLimit},
expectedLimit: defaultPathLimit,
},
{
name: "Search with Contains operator (should default to LIKE)",
fieldKeySelectors: []*telemetrytypes.FieldKeySelector{
{
Name: "test",
SelectorMatchType: telemetrytypes.FieldSelectorMatchTypeFuzzy,
},
},
expectedSQL: "SELECT path, groupArray(DISTINCT type) AS types, max(last_seen) AS last_seen FROM signoz_metadata.distributed_json_path_types WHERE (LOWER(path) LIKE LOWER(?)) GROUP BY path ORDER BY last_seen DESC LIMIT ?",
expectedArgs: []any{"%test%", defaultPathLimit},
expectedLimit: defaultPathLimit,
},
}
for _, tc := range testCases {
t.Run(tc.name, func(t *testing.T) {
query, args, limit := buildGetBodyJSONPathsQuery(tc.fieldKeySelectors)
require.Equal(t, tc.expectedSQL, query)
require.Equal(t, tc.expectedArgs, args)
require.Equal(t, tc.expectedLimit, limit)
})
}
}
func TestBuildListLogsJSONIndexesQuery(t *testing.T) {
testCases := []struct {
name string

View File

@@ -368,19 +368,6 @@ func (t *telemetryMetaStore) logsTblStatementToFieldKeys(ctx context.Context) ([
return materialisedKeys, nil
}
// logKeysUnionArm declares one arm of the UNION ALL in getLogsKeys.
// All per-table variance is captured here so the loop body can stay uniform.
type logKeysUnionArm struct {
shouldQuery bool
fieldContext telemetrytypes.FieldContext
table string
dataTypeColumn string // column used in WHERE/GROUP BY
dataTypeSelectExpr string // expression used in SELECT (may wrap with lower())
addBaseFilters func(sb *sqlbuilder.SelectBuilder) // mandatory WHERE filters (e.g., signal, field_context)
encodeDataType func(telemetrytypes.FieldDataType) string // how to render a FieldDataType in WHERE values
extraOrBranch func(sb *sqlbuilder.SelectBuilder) string // optional extra OR branch (e.g., body parent-types)
}
// getLogsKeys returns the keys from the spans that match the field selection criteria.
func (t *telemetryMetaStore) getLogsKeys(ctx context.Context, fieldKeySelectors []*telemetrytypes.FieldKeySelector) ([]*telemetrytypes.TelemetryFieldKey, bool, error) {
ctx = ctxtypes.NewContextWithCommentVals(ctx, map[string]string{
@@ -410,149 +397,90 @@ func (t *telemetryMetaStore) getLogsKeys(ctx context.Context, fieldKeySelectors
// tables to query based on field selectors
queryAttributeTable := false
queryResourceTable := false
queryBodyTable := false
for _, selector := range fieldKeySelectors {
if selector.FieldContext == telemetrytypes.FieldContextUnspecified {
// unspecified context, query all tables
// unspecified context, query both tables
queryAttributeTable = true
queryResourceTable = true
queryBodyTable = true
break
} else if selector.FieldContext == telemetrytypes.FieldContextAttribute {
queryAttributeTable = true
} else if selector.FieldContext == telemetrytypes.FieldContextResource {
queryResourceTable = true
} else if selector.FieldContext == telemetrytypes.FieldContextBody && querybuilder.BodyJSONQueryEnabled {
queryBodyTable = true
}
}
// body keys are gated behind the feature flag
queryBodyTable = queryBodyTable && querybuilder.BodyJSONQueryEnabled
// pre-compute parent array path names from body selectors for JSON plan building;
// these will be fetched as a separate UNION arm filtered to ArrayJSON/ArrayDynamic only
parentPaths := make(map[string]bool)
if queryBodyTable {
for _, sel := range fieldKeySelectors {
if sel.FieldContext != telemetrytypes.FieldContextBody &&
sel.FieldContext != telemetrytypes.FieldContextUnspecified {
continue
}
if !strings.Contains(sel.Name, telemetrytypes.ArraySep) {
continue
}
key := &telemetrytypes.TelemetryFieldKey{
Name: sel.Name,
Signal: telemetrytypes.SignalLogs,
FieldContext: telemetrytypes.FieldContextBody,
}
for _, ps := range key.ArrayParentSelectors() {
parentPaths[ps.Name] = true
}
}
tablesToQuery := []struct {
fieldContext telemetrytypes.FieldContext
shouldQuery bool
}{
{telemetrytypes.FieldContextAttribute, queryAttributeTable},
{telemetrytypes.FieldContextResource, queryResourceTable},
}
// Each UNION arm differs only in: table, data-type column name and SELECT
// expression (lower-wrapped for historical mixed-case in attr/resource),
// base WHERE filters, the per-selector data-type encoding, and (for body)
// an extra OR branch that fetches parent array types for JSON plan building.
// All other logic is shared by the loop below.
tablesToQuery := []logKeysUnionArm{
{
shouldQuery: queryAttributeTable,
fieldContext: telemetrytypes.FieldContextAttribute,
table: t.logsDBName + "." + t.logAttributeKeysTblName,
dataTypeColumn: "datatype",
dataTypeSelectExpr: "lower(datatype)",
addBaseFilters: func(*sqlbuilder.SelectBuilder) {},
encodeDataType: func(ft telemetrytypes.FieldDataType) string { return ft.TagDataType() },
extraOrBranch: func(*sqlbuilder.SelectBuilder) string { return "" },
},
{
shouldQuery: queryResourceTable,
fieldContext: telemetrytypes.FieldContextResource,
table: t.logsDBName + "." + t.logResourceKeysTblName,
dataTypeColumn: "datatype",
dataTypeSelectExpr: "lower(datatype)",
addBaseFilters: func(*sqlbuilder.SelectBuilder) {},
encodeDataType: func(ft telemetrytypes.FieldDataType) string { return ft.TagDataType() },
extraOrBranch: func(*sqlbuilder.SelectBuilder) string { return "" },
},
{
shouldQuery: queryBodyTable,
fieldContext: telemetrytypes.FieldContextBody,
table: fmt.Sprintf("%s.%s", DBName, FieldKeysTable),
dataTypeColumn: "field_data_type",
dataTypeSelectExpr: "field_data_type",
addBaseFilters: func(sb *sqlbuilder.SelectBuilder) {
sb.Where(sb.E("signal", telemetrytypes.SignalLogs.StringValue()))
sb.Where(sb.E("field_context", telemetrytypes.FieldContextBody.StringValue()))
},
encodeDataType: func(ft telemetrytypes.FieldDataType) string { return ft.StringValue() },
extraOrBranch: func(sb *sqlbuilder.SelectBuilder) string {
if len(parentPaths) == 0 {
return ""
}
names := make([]any, 0, len(parentPaths))
for n := range parentPaths {
names = append(names, n)
}
return sb.And(
sb.In("name", names...),
sb.In("field_data_type",
telemetrytypes.FieldDataTypeArrayDynamic.StringValue(),
telemetrytypes.FieldDataTypeArrayJSON.StringValue(),
),
)
},
},
}
for _, arm := range tablesToQuery {
if !arm.shouldQuery {
for _, table := range tablesToQuery {
if !table.shouldQuery {
continue
}
fieldContext := table.fieldContext
// table name based on field context
var tblName string
if fieldContext == telemetrytypes.FieldContextAttribute {
tblName = t.logsDBName + "." + t.logAttributeKeysTblName
} else {
tblName = t.logsDBName + "." + t.logResourceKeysTblName
}
sb := sqlbuilder.Select(
"name AS tag_key",
fmt.Sprintf("'%s' AS tag_type", arm.fieldContext.TagType()),
arm.dataTypeSelectExpr+" AS tag_data_type",
fmt.Sprintf("%d AS priority", getPriorityForContext(arm.fieldContext)),
).From(arm.table)
fmt.Sprintf("'%s' AS tag_type", fieldContext.TagType()),
"lower(datatype) AS tag_data_type", // in logs, we had some historical data with capital and small case
fmt.Sprintf(`%d AS priority`, getPriorityForContext(fieldContext)),
).From(tblName)
arm.addBaseFilters(sb)
var limit int
conds := []string{}
branches := []string{}
for _, sel := range fieldKeySelectors {
if sel.FieldContext != telemetrytypes.FieldContextUnspecified && sel.FieldContext != arm.fieldContext {
for _, fieldKeySelector := range fieldKeySelectors {
// Include this selector if:
// 1. It has unspecified context (matches all tables)
// 2. Its context matches the current table's context
if fieldKeySelector.FieldContext != telemetrytypes.FieldContextUnspecified &&
fieldKeySelector.FieldContext != fieldContext {
continue
}
parts := []string{}
if sel.SelectorMatchType == telemetrytypes.FieldSelectorMatchTypeExact {
parts = append(parts, sb.E("name", sel.Name))
// key part of the selector
fieldKeyConds := []string{}
if fieldKeySelector.SelectorMatchType == telemetrytypes.FieldSelectorMatchTypeExact {
fieldKeyConds = append(fieldKeyConds, sb.E("name", fieldKeySelector.Name))
} else {
parts = append(parts, sb.ILike("name", "%"+escapeForLike(sel.Name)+"%"))
fieldKeyConds = append(fieldKeyConds, sb.ILike("name", "%"+escapeForLike(fieldKeySelector.Name)+"%"))
}
if sel.FieldDataType != telemetrytypes.FieldDataTypeUnspecified {
parts = append(parts, sb.E(arm.dataTypeColumn, arm.encodeDataType(sel.FieldDataType)))
// now look at the field data type
if fieldKeySelector.FieldDataType != telemetrytypes.FieldDataTypeUnspecified {
fieldKeyConds = append(fieldKeyConds, sb.E("datatype", fieldKeySelector.FieldDataType.TagDataType()))
}
if len(parts) > 0 {
branches = append(branches, sb.And(parts...))
if len(fieldKeyConds) > 0 {
conds = append(conds, sb.And(fieldKeyConds...))
}
limit += fieldKeySelector.Limit
}
if extra := arm.extraOrBranch(sb); extra != "" {
branches = append(branches, extra)
if len(conds) > 0 {
sb.Where(sb.Or(conds...))
}
if len(branches) > 0 {
sb.Where(sb.Or(branches...))
sb.GroupBy("name", "datatype")
if limit == 0 {
limit = 1000
}
sb.GroupBy("name", arm.dataTypeColumn)
query, args := sb.BuildWithFlavor(sqlbuilder.ClickHouse)
queries = append(queries, query)
allArgs = append(allArgs, args...)
@@ -589,7 +517,6 @@ func (t *telemetryMetaStore) getLogsKeys(ctx context.Context, fieldKeySelectors
defer rows.Close()
keys := []*telemetrytypes.TelemetryFieldKey{}
parentTypeCache := make(map[string][]telemetrytypes.FieldDataType)
rowCount := 0
searchTexts := []string{}
@@ -613,17 +540,6 @@ func (t *telemetryMetaStore) getLogsKeys(ctx context.Context, fieldKeySelectors
if err != nil {
return nil, false, errors.Wrap(err, errors.TypeInternal, errors.CodeInternal, ErrFailedToGetLogsKeys.Error())
}
// body keys with ArrayJSON/ArrayDynamic types are internal container types
// used only for JSON access plan building; route to parentTypeCache, not to results
switch fieldDataType {
case telemetrytypes.FieldDataTypeArrayJSON, telemetrytypes.FieldDataTypeArrayDynamic:
if fieldContext == telemetrytypes.FieldContextBody && parentPaths[name] {
parentTypeCache[name] = append(parentTypeCache[name], fieldDataType)
continue
}
}
key, ok := mapOfKeys[name+";"+fieldContext.StringValue()+";"+fieldDataType.StringValue()]
// if there is no materialised column, create a key with the field context and data type
@@ -677,11 +593,13 @@ func (t *telemetryMetaStore) getLogsKeys(ctx context.Context, fieldKeySelectors
}
}
// enrich body keys with promoted paths, indexes, and JSON access plans
if querybuilder.BodyJSONQueryEnabled {
if err := t.enrichBodyKeys(ctx, keys, parentTypeCache); err != nil {
t.logger.ErrorContext(ctx, "failed to enrich body JSON keys", errors.Attr(err))
bodyJSONPaths, finished, err := t.buildBodyJSONPaths(ctx, fieldKeySelectors) // LIKE for pattern matching
if err != nil {
t.logger.ErrorContext(ctx, "failed to extract body JSON paths", errors.Attr(err))
}
keys = append(keys, bodyJSONPaths...)
complete = complete && finished
}
if _, err := t.updateColumnEvolutionMetadataForKeys(ctx, keys); err != nil {

View File

@@ -1,11 +1,13 @@
package telemetrymetadata
import otelcollectorconst "github.com/SigNoz/signoz-otel-collector/constants"
const (
DBName = "signoz_metadata"
AttributesMetadataTableName = "distributed_attributes_metadata"
AttributesMetadataLocalTableName = "attributes_metadata"
ColumnEvolutionMetadataTableName = "distributed_column_evolution_metadata"
FieldKeysTable = "distributed_field_keys"
PathTypesTableName = otelcollectorconst.DistributedPathTypesTable
// Column Evolution table stores promoted paths as (signal, column_name, field_context, field_name); see signoz-otel-collector metadata_migrations.
PromotedPathsTableName = "distributed_column_evolution_metadata"
SkipIndexTableName = "system.data_skipping_indices"

View File

@@ -1,6 +1,50 @@
package telemetrytraces
import "github.com/SigNoz/signoz/pkg/types/telemetrytypes"
import (
"github.com/SigNoz/signoz/pkg/types/telemetrytypes"
)
const (
// Internal Columns.
SpanTimestampBucketStartColumn = "ts_bucket_start"
SpanResourceFingerPrintColumn = "resource_fingerprint"
// Intrinsic Columns.
SpanTimestampColumn = "timestamp"
SpanTraceIDColumn = "trace_id"
SpanSpanIDColumn = "span_id"
SpanTraceStateColumn = "trace_state"
SpanParentSpanIDColumn = "parent_span_id"
SpanFlagsColumn = "flags"
SpanNameColumn = "name"
SpanKindColumn = "kind"
SpanKindStringColumn = "kind_string"
SpanDurationNanoColumn = "duration_nano"
SpanStatusCodeColumn = "status_code"
SpanStatusMessageColumn = "status_message"
SpanStatusCodeStringColumn = "status_code_string"
SpanEventsColumn = "events"
SpanLinksColumn = "links"
// Calculated Columns.
SpanResponseStatusCodeColumn = "response_status_code"
SpanExternalHTTPURLColumn = "external_http_url"
SpanHTTPURLColumn = "http_url"
SpanExternalHTTPMethodColumn = "external_http_method"
SpanHTTPMethodColumn = "http_method"
SpanHTTPHostColumn = "http_host"
SpanDBNameColumn = "db_name"
SpanDBOperationColumn = "db_operation"
SpanHasErrorColumn = "has_error"
SpanIsRemoteColumn = "is_remote"
// Contextual Columns.
SpanAttributesStringColumn = "attributes_string"
SpanAttributesNumberColumn = "attributes_number"
SpanAttributesBoolColumn = "attributes_bool"
SpanResourcesStringColumn = "resources_string"
)
var (
IntrinsicFields = map[string]telemetrytypes.TelemetryFieldKey{

View File

@@ -78,6 +78,16 @@ func TestGetFieldKeyName(t *testing.T) {
expectedResult: "multiIf(resource.`deployment.environment` IS NOT NULL, resource.`deployment.environment`::String, `resource_string_deployment$$environment_exists`==true, `resource_string_deployment$$environment`, NULL)",
expectedError: nil,
},
{
name: "Contextual map column - attributes_string without span context does not short-circuit",
key: telemetrytypes.TelemetryFieldKey{
Name: SpanAttributesStringColumn,
FieldContext: telemetrytypes.FieldContextAttribute,
FieldDataType: telemetrytypes.FieldDataTypeString,
},
expectedResult: "attributes_string['attributes_string']",
expectedError: nil,
},
{
name: "Non-existent column",
key: telemetrytypes.TelemetryFieldKey{

View File

@@ -4,7 +4,6 @@ import (
"context"
"fmt"
"log/slog"
"slices"
"strings"
"github.com/SigNoz/signoz/pkg/errors"
@@ -15,7 +14,6 @@ import (
qbtypes "github.com/SigNoz/signoz/pkg/types/querybuildertypes/querybuildertypesv5"
"github.com/SigNoz/signoz/pkg/types/telemetrytypes"
"github.com/huandu/go-sqlbuilder"
"golang.org/x/exp/maps"
)
var (
@@ -86,40 +84,12 @@ func (b *traceQueryStatementBuilder) Build(
return nil, err
}
/*
Adding a tech debt note here:
This piece of code is a hot fix and should be removed once we close issue: engineering-pod/issues/3622
*/
/*
-------------------------------- Start of tech debt ----------------------------
*/
isSelectFieldsEmpty := false
if requestType == qbtypes.RequestTypeRaw {
selectedFields := query.SelectFields
if len(selectedFields) == 0 {
sortedKeys := maps.Keys(DefaultFields)
slices.Sort(sortedKeys)
for _, key := range sortedKeys {
selectedFields = append(selectedFields, DefaultFields[key])
}
query.SelectFields = selectedFields
}
selectFieldKeys := []string{}
for _, field := range selectedFields {
selectFieldKeys = append(selectFieldKeys, field.Name)
}
for _, x := range []string{"timestamp", "span_id", "trace_id"} {
if !slices.Contains(selectFieldKeys, x) {
query.SelectFields = append(query.SelectFields, DefaultFields[x])
}
}
// we are expanding here to ensure that all the conflicts are taken care in adjustKeys
// i.e if there is a conflict we strip away context of the key in adjustKeys
query, isSelectFieldsEmpty = b.expandRawSelectFields(query)
}
/*
-------------------------------- End of tech debt ----------------------------
*/
query = b.adjustKeys(ctx, keys, query, requestType)
@@ -128,7 +98,7 @@ func (b *traceQueryStatementBuilder) Build(
switch requestType {
case qbtypes.RequestTypeRaw:
return b.buildListQuery(ctx, q, query, start, end, keys, variables)
return b.buildListQuery(ctx, q, query, start, end, keys, variables, isSelectFieldsEmpty)
case qbtypes.RequestTypeTimeSeries:
return b.buildTimeSeriesQuery(ctx, q, query, start, end, keys, variables)
case qbtypes.RequestTypeScalar:
@@ -292,6 +262,7 @@ func (b *traceQueryStatementBuilder) buildListQuery(
start, end uint64,
keys map[string][]*telemetrytypes.TelemetryFieldKey,
variables map[string]qbtypes.VariableItem,
isSelectFieldsEmpty bool,
) (*qbtypes.Statement, error) {
var (
@@ -306,7 +277,6 @@ func (b *traceQueryStatementBuilder) buildListQuery(
cteArgs = append(cteArgs, args)
}
// TODO: should we deprecate `SelectFields` and return everything from a span like we do for logs?
for _, field := range query.SelectFields {
colExpr, err := b.fm.ColumnExpressionFor(ctx, start, end, &field, keys)
if err != nil {
@@ -315,6 +285,13 @@ func (b *traceQueryStatementBuilder) buildListQuery(
sb.SelectMore(colExpr)
}
if isSelectFieldsEmpty {
sb.SelectMore(SpanAttributesStringColumn)
sb.SelectMore(SpanAttributesNumberColumn)
sb.SelectMore(SpanAttributesBoolColumn)
sb.SelectMore(SpanResourcesStringColumn)
}
// From table
sb.From(fmt.Sprintf("%s.%s", DBName, SpanIndexV3TableName))
@@ -838,3 +815,52 @@ func (b *traceQueryStatementBuilder) buildResourceFilterCTE(
variables,
)
}
// expandRawSelectFields populates SelectFields for raw (list view) queries.
// It must be called before adjustKeys so that normalization runs over the full set.
// Returns the updated query and whether the original SelectFields was empty (i.e. full expansion was performed).
func (b *traceQueryStatementBuilder) expandRawSelectFields(query qbtypes.QueryBuilderQuery[qbtypes.TraceAggregation]) (qbtypes.QueryBuilderQuery[qbtypes.TraceAggregation], bool) {
wasEmpty := len(query.SelectFields) == 0
selectFields := []telemetrytypes.TelemetryFieldKey{
{Name: SpanTimestampColumn, FieldContext: telemetrytypes.FieldContextSpan},
{Name: SpanTraceIDColumn, FieldContext: telemetrytypes.FieldContextSpan},
{Name: SpanSpanIDColumn, FieldContext: telemetrytypes.FieldContextSpan},
}
if wasEmpty {
// Select all intrinsic columns
selectFields = append(selectFields, telemetrytypes.TelemetryFieldKey{Name: SpanTraceStateColumn, FieldContext: telemetrytypes.FieldContextSpan})
selectFields = append(selectFields, telemetrytypes.TelemetryFieldKey{Name: SpanParentSpanIDColumn, FieldContext: telemetrytypes.FieldContextSpan})
selectFields = append(selectFields, telemetrytypes.TelemetryFieldKey{Name: SpanFlagsColumn, FieldContext: telemetrytypes.FieldContextSpan})
selectFields = append(selectFields, telemetrytypes.TelemetryFieldKey{Name: SpanNameColumn, FieldContext: telemetrytypes.FieldContextSpan})
selectFields = append(selectFields, telemetrytypes.TelemetryFieldKey{Name: SpanKindColumn, FieldContext: telemetrytypes.FieldContextSpan})
selectFields = append(selectFields, telemetrytypes.TelemetryFieldKey{Name: SpanKindStringColumn, FieldContext: telemetrytypes.FieldContextSpan})
selectFields = append(selectFields, telemetrytypes.TelemetryFieldKey{Name: SpanDurationNanoColumn, FieldContext: telemetrytypes.FieldContextSpan})
selectFields = append(selectFields, telemetrytypes.TelemetryFieldKey{Name: SpanStatusCodeColumn, FieldContext: telemetrytypes.FieldContextSpan})
selectFields = append(selectFields, telemetrytypes.TelemetryFieldKey{Name: SpanStatusMessageColumn, FieldContext: telemetrytypes.FieldContextSpan})
selectFields = append(selectFields, telemetrytypes.TelemetryFieldKey{Name: SpanStatusCodeStringColumn, FieldContext: telemetrytypes.FieldContextSpan})
selectFields = append(selectFields, telemetrytypes.TelemetryFieldKey{Name: SpanEventsColumn, FieldContext: telemetrytypes.FieldContextSpan})
selectFields = append(selectFields, telemetrytypes.TelemetryFieldKey{Name: SpanLinksColumn, FieldContext: telemetrytypes.FieldContextSpan})
// select all calculated columns
selectFields = append(selectFields, telemetrytypes.TelemetryFieldKey{Name: SpanResponseStatusCodeColumn, FieldContext: telemetrytypes.FieldContextSpan})
selectFields = append(selectFields, telemetrytypes.TelemetryFieldKey{Name: SpanExternalHTTPURLColumn, FieldContext: telemetrytypes.FieldContextSpan})
selectFields = append(selectFields, telemetrytypes.TelemetryFieldKey{Name: SpanHTTPURLColumn, FieldContext: telemetrytypes.FieldContextSpan})
selectFields = append(selectFields, telemetrytypes.TelemetryFieldKey{Name: SpanExternalHTTPMethodColumn, FieldContext: telemetrytypes.FieldContextSpan})
selectFields = append(selectFields, telemetrytypes.TelemetryFieldKey{Name: SpanHTTPMethodColumn, FieldContext: telemetrytypes.FieldContextSpan})
selectFields = append(selectFields, telemetrytypes.TelemetryFieldKey{Name: SpanHTTPHostColumn, FieldContext: telemetrytypes.FieldContextSpan})
selectFields = append(selectFields, telemetrytypes.TelemetryFieldKey{Name: SpanDBNameColumn, FieldContext: telemetrytypes.FieldContextSpan})
selectFields = append(selectFields, telemetrytypes.TelemetryFieldKey{Name: SpanDBOperationColumn, FieldContext: telemetrytypes.FieldContextSpan})
selectFields = append(selectFields, telemetrytypes.TelemetryFieldKey{Name: SpanHasErrorColumn, FieldContext: telemetrytypes.FieldContextSpan})
selectFields = append(selectFields, telemetrytypes.TelemetryFieldKey{Name: SpanIsRemoteColumn, FieldContext: telemetrytypes.FieldContextSpan})
} else {
for _, field := range query.SelectFields {
// TODO(tvats): If a user specifies attribute.timestamp in the select fields, this loop will basically ignore it, as we already added a field by default. This can be fixed once we close https://github.com/SigNoz/engineering-pod/issues/3693
if field.Name == SpanTimestampColumn || field.Name == SpanTraceIDColumn || field.Name == SpanSpanIDColumn {
continue
}
selectFields = append(selectFields, field)
}
}
query.SelectFields = selectFields
return query, wasEmpty
}

View File

@@ -436,7 +436,7 @@ func TestStatementBuilderListQuery(t *testing.T) {
},
},
expected: qbtypes.Statement{
Query: "WITH __resource_filter AS (SELECT fingerprint FROM signoz_traces.distributed_traces_v3_resource WHERE (simpleJSONExtractString(labels, 'service.name') = ? AND labels LIKE ? AND labels LIKE ?) AND seen_at_ts_bucket_start >= ? AND seen_at_ts_bucket_start <= ?) SELECT name AS `name`, multiIf(resource.`service.name` IS NOT NULL, resource.`service.name`::String, mapContains(resources_string, 'service.name'), resources_string['service.name'], NULL) AS `service.name`, duration_nano AS `duration_nano`, `attribute_number_cart$$items_count` AS `cart.items_count`, timestamp AS `timestamp`, span_id AS `span_id`, trace_id AS `trace_id` FROM signoz_traces.distributed_signoz_index_v3 WHERE resource_fingerprint GLOBAL IN (SELECT fingerprint FROM __resource_filter) AND true AND timestamp >= ? AND timestamp < ? AND ts_bucket_start >= ? AND ts_bucket_start <= ? LIMIT ?",
Query: "WITH __resource_filter AS (SELECT fingerprint FROM signoz_traces.distributed_traces_v3_resource WHERE (simpleJSONExtractString(labels, 'service.name') = ? AND labels LIKE ? AND labels LIKE ?) AND seen_at_ts_bucket_start >= ? AND seen_at_ts_bucket_start <= ?) SELECT timestamp AS `timestamp`, trace_id AS `trace_id`, span_id AS `span_id`, name AS `name`, multiIf(resource.`service.name` IS NOT NULL, resource.`service.name`::String, mapContains(resources_string, 'service.name'), resources_string['service.name'], NULL) AS `service.name`, duration_nano AS `duration_nano`, `attribute_number_cart$$items_count` AS `cart.items_count` FROM signoz_traces.distributed_signoz_index_v3 WHERE resource_fingerprint GLOBAL IN (SELECT fingerprint FROM __resource_filter) AND true AND timestamp >= ? AND timestamp < ? AND ts_bucket_start >= ? AND ts_bucket_start <= ? LIMIT ?",
Args: []any{"redis-manual", "%service.name%", "%service.name\":\"redis-manual%", uint64(1747945619), uint64(1747983448), "1747947419000000000", "1747983448000000000", uint64(1747945619), uint64(1747983448), 10},
},
expectedErr: nil,
@@ -465,7 +465,7 @@ func TestStatementBuilderListQuery(t *testing.T) {
Limit: 10,
},
expected: qbtypes.Statement{
Query: "WITH __resource_filter AS (SELECT fingerprint FROM signoz_traces.distributed_traces_v3_resource WHERE (simpleJSONExtractString(labels, 'service.name') = ? AND labels LIKE ? AND labels LIKE ?) AND seen_at_ts_bucket_start >= ? AND seen_at_ts_bucket_start <= ?) SELECT duration_nano AS `duration_nano`, name AS `name`, response_status_code AS `response_status_code`, multiIf(resource.`service.name` IS NOT NULL, resource.`service.name`::String, mapContains(resources_string, 'service.name'), resources_string['service.name'], NULL) AS `service.name`, span_id AS `span_id`, timestamp AS `timestamp`, trace_id AS `trace_id` FROM signoz_traces.distributed_signoz_index_v3 WHERE resource_fingerprint GLOBAL IN (SELECT fingerprint FROM __resource_filter) AND true AND timestamp >= ? AND timestamp < ? AND ts_bucket_start >= ? AND ts_bucket_start <= ? ORDER BY attributes_string['user.id'] AS `user.id` desc LIMIT ?",
Query: "WITH __resource_filter AS (SELECT fingerprint FROM signoz_traces.distributed_traces_v3_resource WHERE (simpleJSONExtractString(labels, 'service.name') = ? AND labels LIKE ? AND labels LIKE ?) AND seen_at_ts_bucket_start >= ? AND seen_at_ts_bucket_start <= ?) SELECT timestamp AS `timestamp`, trace_id AS `trace_id`, span_id AS `span_id`, trace_state AS `trace_state`, parent_span_id AS `parent_span_id`, flags AS `flags`, name AS `name`, kind AS `kind`, kind_string AS `kind_string`, duration_nano AS `duration_nano`, status_code AS `status_code`, status_message AS `status_message`, status_code_string AS `status_code_string`, events AS `events`, links AS `links`, response_status_code AS `response_status_code`, external_http_url AS `external_http_url`, http_url AS `http_url`, external_http_method AS `external_http_method`, http_method AS `http_method`, http_host AS `http_host`, db_name AS `db_name`, db_operation AS `db_operation`, has_error AS `has_error`, is_remote AS `is_remote`, attributes_string, attributes_number, attributes_bool, resources_string FROM signoz_traces.distributed_signoz_index_v3 WHERE resource_fingerprint GLOBAL IN (SELECT fingerprint FROM __resource_filter) AND true AND timestamp >= ? AND timestamp < ? AND ts_bucket_start >= ? AND ts_bucket_start <= ? ORDER BY attributes_string['user.id'] AS `user.id` desc LIMIT ?",
Args: []any{"redis-manual", "%service.name%", "%service.name\":\"redis-manual%", uint64(1747945619), uint64(1747983448), "1747947419000000000", "1747983448000000000", uint64(1747945619), uint64(1747983448), 10},
},
expectedErr: nil,
@@ -509,7 +509,7 @@ func TestStatementBuilderListQuery(t *testing.T) {
Limit: 10,
},
expected: qbtypes.Statement{
Query: "WITH __resource_filter AS (SELECT fingerprint FROM signoz_traces.distributed_traces_v3_resource WHERE (simpleJSONExtractString(labels, 'service.name') = ? AND labels LIKE ? AND labels LIKE ?) AND seen_at_ts_bucket_start >= ? AND seen_at_ts_bucket_start <= ?) SELECT name AS `name`, resource_string_service$$name AS `serviceName`, duration_nano AS `durationNano`, http_method AS `httpMethod`, response_status_code AS `responseStatusCode`, timestamp AS `timestamp`, span_id AS `span_id`, trace_id AS `trace_id` FROM signoz_traces.distributed_signoz_index_v3 WHERE resource_fingerprint GLOBAL IN (SELECT fingerprint FROM __resource_filter) AND true AND timestamp >= ? AND timestamp < ? AND ts_bucket_start >= ? AND ts_bucket_start <= ? LIMIT ?",
Query: "WITH __resource_filter AS (SELECT fingerprint FROM signoz_traces.distributed_traces_v3_resource WHERE (simpleJSONExtractString(labels, 'service.name') = ? AND labels LIKE ? AND labels LIKE ?) AND seen_at_ts_bucket_start >= ? AND seen_at_ts_bucket_start <= ?) SELECT timestamp AS `timestamp`, trace_id AS `trace_id`, span_id AS `span_id`, name AS `name`, resource_string_service$$name AS `serviceName`, duration_nano AS `durationNano`, http_method AS `httpMethod`, response_status_code AS `responseStatusCode` FROM signoz_traces.distributed_signoz_index_v3 WHERE resource_fingerprint GLOBAL IN (SELECT fingerprint FROM __resource_filter) AND true AND timestamp >= ? AND timestamp < ? AND ts_bucket_start >= ? AND ts_bucket_start <= ? LIMIT ?",
Args: []any{"redis-manual", "%service.name%", "%service.name\":\"redis-manual%", uint64(1747945619), uint64(1747983448), "1747947419000000000", "1747983448000000000", uint64(1747945619), uint64(1747983448), 10},
},
expectedErr: nil,
@@ -553,7 +553,7 @@ func TestStatementBuilderListQuery(t *testing.T) {
Limit: 10,
},
expected: qbtypes.Statement{
Query: "WITH __resource_filter AS (SELECT fingerprint FROM signoz_traces.distributed_traces_v3_resource WHERE (simpleJSONExtractString(labels, 'service.name') = ? AND labels LIKE ? AND labels LIKE ?) AND seen_at_ts_bucket_start >= ? AND seen_at_ts_bucket_start <= ?) SELECT name AS `name`, resource_string_service$$name AS `serviceName`, duration_nano AS `durationNano`, http_method AS `httpMethod`, multiIf(toString(`attribute_string_mixed$$materialization$$key`) != '', toString(`attribute_string_mixed$$materialization$$key`), toString(multiIf(resource.`mixed.materialization.key` IS NOT NULL, resource.`mixed.materialization.key`::String, mapContains(resources_string, 'mixed.materialization.key'), resources_string['mixed.materialization.key'], NULL)) != '', toString(multiIf(resource.`mixed.materialization.key` IS NOT NULL, resource.`mixed.materialization.key`::String, mapContains(resources_string, 'mixed.materialization.key'), resources_string['mixed.materialization.key'], NULL)), NULL) AS `mixed.materialization.key`, timestamp AS `timestamp`, span_id AS `span_id`, trace_id AS `trace_id` FROM signoz_traces.distributed_signoz_index_v3 WHERE resource_fingerprint GLOBAL IN (SELECT fingerprint FROM __resource_filter) AND true AND timestamp >= ? AND timestamp < ? AND ts_bucket_start >= ? AND ts_bucket_start <= ? LIMIT ?",
Query: "WITH __resource_filter AS (SELECT fingerprint FROM signoz_traces.distributed_traces_v3_resource WHERE (simpleJSONExtractString(labels, 'service.name') = ? AND labels LIKE ? AND labels LIKE ?) AND seen_at_ts_bucket_start >= ? AND seen_at_ts_bucket_start <= ?) SELECT timestamp AS `timestamp`, trace_id AS `trace_id`, span_id AS `span_id`, name AS `name`, resource_string_service$$name AS `serviceName`, duration_nano AS `durationNano`, http_method AS `httpMethod`, multiIf(toString(`attribute_string_mixed$$materialization$$key`) != '', toString(`attribute_string_mixed$$materialization$$key`), toString(multiIf(resource.`mixed.materialization.key` IS NOT NULL, resource.`mixed.materialization.key`::String, mapContains(resources_string, 'mixed.materialization.key'), resources_string['mixed.materialization.key'], NULL)) != '', toString(multiIf(resource.`mixed.materialization.key` IS NOT NULL, resource.`mixed.materialization.key`::String, mapContains(resources_string, 'mixed.materialization.key'), resources_string['mixed.materialization.key'], NULL)), NULL) AS `mixed.materialization.key` FROM signoz_traces.distributed_signoz_index_v3 WHERE resource_fingerprint GLOBAL IN (SELECT fingerprint FROM __resource_filter) AND true AND timestamp >= ? AND timestamp < ? AND ts_bucket_start >= ? AND ts_bucket_start <= ? LIMIT ?",
Args: []any{"redis-manual", "%service.name%", "%service.name\":\"redis-manual%", uint64(1747945619), uint64(1747983448), "1747947419000000000", "1747983448000000000", uint64(1747945619), uint64(1747983448), 10},
},
expectedErr: nil,
@@ -598,7 +598,7 @@ func TestStatementBuilderListQuery(t *testing.T) {
Limit: 10,
},
expected: qbtypes.Statement{
Query: "WITH __resource_filter AS (SELECT fingerprint FROM signoz_traces.distributed_traces_v3_resource WHERE (simpleJSONExtractString(labels, 'service.name') = ? AND labels LIKE ? AND labels LIKE ?) AND seen_at_ts_bucket_start >= ? AND seen_at_ts_bucket_start <= ?) SELECT name AS `name`, resource_string_service$$name AS `serviceName`, duration_nano AS `durationNano`, http_method AS `httpMethod`, `attribute_string_mixed$$materialization$$key` AS `mixed.materialization.key`, timestamp AS `timestamp`, span_id AS `span_id`, trace_id AS `trace_id` FROM signoz_traces.distributed_signoz_index_v3 WHERE resource_fingerprint GLOBAL IN (SELECT fingerprint FROM __resource_filter) AND true AND timestamp >= ? AND timestamp < ? AND ts_bucket_start >= ? AND ts_bucket_start <= ? LIMIT ?",
Query: "WITH __resource_filter AS (SELECT fingerprint FROM signoz_traces.distributed_traces_v3_resource WHERE (simpleJSONExtractString(labels, 'service.name') = ? AND labels LIKE ? AND labels LIKE ?) AND seen_at_ts_bucket_start >= ? AND seen_at_ts_bucket_start <= ?) SELECT timestamp AS `timestamp`, trace_id AS `trace_id`, span_id AS `span_id`, name AS `name`, resource_string_service$$name AS `serviceName`, duration_nano AS `durationNano`, http_method AS `httpMethod`, `attribute_string_mixed$$materialization$$key` AS `mixed.materialization.key` FROM signoz_traces.distributed_signoz_index_v3 WHERE resource_fingerprint GLOBAL IN (SELECT fingerprint FROM __resource_filter) AND true AND timestamp >= ? AND timestamp < ? AND ts_bucket_start >= ? AND ts_bucket_start <= ? LIMIT ?",
Args: []any{"redis-manual", "%service.name%", "%service.name\":\"redis-manual%", uint64(1747945619), uint64(1747983448), "1747947419000000000", "1747983448000000000", uint64(1747945619), uint64(1747983448), 10},
},
expectedErr: nil,
@@ -706,7 +706,7 @@ func TestStatementBuilderListQueryWithCorruptData(t *testing.T) {
Limit: 10,
},
expected: qbtypes.Statement{
Query: "WITH __resource_filter AS (SELECT fingerprint FROM signoz_traces.distributed_traces_v3_resource WHERE seen_at_ts_bucket_start >= ? AND seen_at_ts_bucket_start <= ?) SELECT duration_nano AS `duration_nano`, name AS `name`, response_status_code AS `response_status_code`, multiIf(resource.`service.name` IS NOT NULL, resource.`service.name`::String, mapContains(resources_string, 'service.name'), resources_string['service.name'], NULL) AS `service.name`, span_id AS `span_id`, timestamp AS `timestamp`, trace_id AS `trace_id` FROM signoz_traces.distributed_signoz_index_v3 WHERE resource_fingerprint GLOBAL IN (SELECT fingerprint FROM __resource_filter) AND timestamp >= ? AND timestamp < ? AND ts_bucket_start >= ? AND ts_bucket_start <= ? LIMIT ?",
Query: "WITH __resource_filter AS (SELECT fingerprint FROM signoz_traces.distributed_traces_v3_resource WHERE seen_at_ts_bucket_start >= ? AND seen_at_ts_bucket_start <= ?) SELECT timestamp AS `timestamp`, trace_id AS `trace_id`, span_id AS `span_id`, trace_state AS `trace_state`, parent_span_id AS `parent_span_id`, flags AS `flags`, name AS `name`, kind AS `kind`, kind_string AS `kind_string`, duration_nano AS `duration_nano`, status_code AS `status_code`, status_message AS `status_message`, status_code_string AS `status_code_string`, events AS `events`, links AS `links`, response_status_code AS `response_status_code`, external_http_url AS `external_http_url`, http_url AS `http_url`, external_http_method AS `external_http_method`, http_method AS `http_method`, http_host AS `http_host`, db_name AS `db_name`, db_operation AS `db_operation`, has_error AS `has_error`, is_remote AS `is_remote`, attributes_string, attributes_number, attributes_bool, resources_string FROM signoz_traces.distributed_signoz_index_v3 WHERE resource_fingerprint GLOBAL IN (SELECT fingerprint FROM __resource_filter) AND timestamp >= ? AND timestamp < ? AND ts_bucket_start >= ? AND ts_bucket_start <= ? LIMIT ?",
Args: []any{uint64(1747945619), uint64(1747983448), "1747947419000000000", "1747983448000000000", uint64(1747945619), uint64(1747983448), 10},
},
expectedErr: nil,
@@ -739,7 +739,7 @@ func TestStatementBuilderListQueryWithCorruptData(t *testing.T) {
}},
},
expected: qbtypes.Statement{
Query: "WITH __resource_filter AS (SELECT fingerprint FROM signoz_traces.distributed_traces_v3_resource WHERE seen_at_ts_bucket_start >= ? AND seen_at_ts_bucket_start <= ?) SELECT duration_nano AS `duration_nano`, name AS `name`, response_status_code AS `response_status_code`, multiIf(resource.`service.name` IS NOT NULL, resource.`service.name`::String, mapContains(resources_string, 'service.name'), resources_string['service.name'], NULL) AS `service.name`, span_id AS `span_id`, timestamp AS `timestamp`, trace_id AS `trace_id` FROM signoz_traces.distributed_signoz_index_v3 WHERE resource_fingerprint GLOBAL IN (SELECT fingerprint FROM __resource_filter) AND timestamp >= ? AND timestamp < ? AND ts_bucket_start >= ? AND ts_bucket_start <= ? ORDER BY timestamp AS `timestamp` asc LIMIT ?",
Query: "WITH __resource_filter AS (SELECT fingerprint FROM signoz_traces.distributed_traces_v3_resource WHERE seen_at_ts_bucket_start >= ? AND seen_at_ts_bucket_start <= ?) SELECT timestamp AS `timestamp`, trace_id AS `trace_id`, span_id AS `span_id`, trace_state AS `trace_state`, parent_span_id AS `parent_span_id`, flags AS `flags`, name AS `name`, kind AS `kind`, kind_string AS `kind_string`, duration_nano AS `duration_nano`, status_code AS `status_code`, status_message AS `status_message`, status_code_string AS `status_code_string`, events AS `events`, links AS `links`, response_status_code AS `response_status_code`, external_http_url AS `external_http_url`, http_url AS `http_url`, external_http_method AS `external_http_method`, http_method AS `http_method`, http_host AS `http_host`, db_name AS `db_name`, db_operation AS `db_operation`, has_error AS `has_error`, is_remote AS `is_remote`, attributes_string, attributes_number, attributes_bool, resources_string FROM signoz_traces.distributed_signoz_index_v3 WHERE resource_fingerprint GLOBAL IN (SELECT fingerprint FROM __resource_filter) AND timestamp >= ? AND timestamp < ? AND ts_bucket_start >= ? AND ts_bucket_start <= ? ORDER BY timestamp AS `timestamp` asc LIMIT ?",
Args: []any{uint64(1747945619), uint64(1747983448), "1747947419000000000", "1747983448000000000", uint64(1747945619), uint64(1747983448), 10},
},
expectedErr: nil,

View File

@@ -37,6 +37,7 @@ type TelemetryFieldKey struct {
FieldContext FieldContext `json:"fieldContext,omitzero"`
FieldDataType FieldDataType `json:"fieldDataType,omitzero"`
JSONDataType *JSONDataType `json:"-"`
JSONPlan JSONAccessPlan `json:"-"`
Indexes []JSONDataTypeIndex `json:"-"`
Materialized bool `json:"-"` // refers to promoted in case of body.... fields
@@ -79,12 +80,6 @@ func (f *TelemetryFieldKey) ArrayParentSelectors() []*FieldKeySelector {
return selectors
}
// GetJSONDataType derives the JSONDataType from FieldDataType.
// Callers should check FieldDataType != FieldDataTypeUnspecified before calling.
func (f *TelemetryFieldKey) GetJSONDataType() JSONDataType {
return MappingFieldDataTypeToJSONDataType[f.FieldDataType]
}
func (f TelemetryFieldKey) String() string {
var sb strings.Builder
fmt.Fprintf(&sb, "name=%s", f.Name)
@@ -97,6 +92,9 @@ func (f TelemetryFieldKey) String() string {
if f.Materialized {
sb.WriteString(",materialized=true")
}
if f.JSONDataType != nil {
fmt.Fprintf(&sb, ",jsondatatype=%s", f.JSONDataType.StringValue())
}
if len(f.Indexes) > 0 {
sb.WriteString(",indexes=[")
for i, index := range f.Indexes {
@@ -119,6 +117,7 @@ func (f TelemetryFieldKey) Text() string {
func (f *TelemetryFieldKey) OverrideMetadataFrom(src *TelemetryFieldKey) {
f.FieldContext = src.FieldContext
f.FieldDataType = src.FieldDataType
f.JSONDataType = src.JSONDataType
f.Indexes = src.Indexes
f.Materialized = src.Materialized
f.JSONPlan = src.JSONPlan

View File

@@ -31,7 +31,7 @@ var (
FieldDataTypeArrayInt64 = FieldDataType{valuer.NewString("[]int64")}
FieldDataTypeArrayNumber = FieldDataType{valuer.NewString("[]number")}
FieldDataTypeArrayJSON = FieldDataType{valuer.NewString("[]object")}
FieldDataTypeArrayObject = FieldDataType{valuer.NewString("[]object")}
FieldDataTypeArrayDynamic = FieldDataType{valuer.NewString("[]dynamic")}
// Map string representations to FieldDataType values
@@ -72,8 +72,6 @@ var (
"[]float64": FieldDataTypeArrayFloat64,
"[]number": FieldDataTypeArrayNumber,
"[]bool": FieldDataTypeArrayBool,
"[]json": FieldDataTypeArrayJSON,
"[]dynamic": FieldDataTypeArrayDynamic,
// c-style array types
"string[]": FieldDataTypeArrayString,
@@ -81,8 +79,6 @@ var (
"float64[]": FieldDataTypeArrayFloat64,
"number[]": FieldDataTypeArrayNumber,
"bool[]": FieldDataTypeArrayBool,
"json[]": FieldDataTypeArrayJSON,
"dynamic[]": FieldDataTypeArrayDynamic,
}
fieldDataTypeToCHDataType = map[FieldDataType]string{

View File

@@ -43,7 +43,7 @@ type JSONAccessNode struct {
isRoot bool // marked true for only body_v2 and body_promoted
// Precomputed type information (single source of truth)
AvailableTypes []FieldDataType
AvailableTypes []JSONDataType
// Array type branches (Array(JSON) vs Array(Dynamic))
Branches map[JSONAccessBranchType]*JSONAccessNode
@@ -106,7 +106,7 @@ type planBuilder struct {
paths []string // cumulative paths for type cache lookups
segments []string // individual path segments for node names
isPromoted bool
typeCache map[string][]FieldDataType
typeCache map[string][]JSONDataType
}
// buildPlan recursively builds the path plan tree.
@@ -155,14 +155,14 @@ func (pb *planBuilder) buildPlan(index int, parent *JSONAccessNode, isDynArrChil
MaxDynamicPaths: maxPaths,
}
hasJSON := slices.Contains(node.AvailableTypes, FieldDataTypeArrayJSON)
hasDynamic := slices.Contains(node.AvailableTypes, FieldDataTypeArrayDynamic)
hasJSON := slices.Contains(node.AvailableTypes, ArrayJSON)
hasDynamic := slices.Contains(node.AvailableTypes, ArrayDynamic)
// Configure terminal if this is the last part
if isTerminal {
node.TerminalConfig = &TerminalConfig{
Key: pb.key,
ElemType: pb.key.GetJSONDataType(),
ElemType: *pb.key.JSONDataType,
}
} else {
var err error
@@ -185,7 +185,7 @@ func (pb *planBuilder) buildPlan(index int, parent *JSONAccessNode, isDynArrChil
// buildJSONAccessPlan builds a tree structure representing the complete JSON path traversal
// that precomputes all possible branches and their types.
func (key *TelemetryFieldKey) SetJSONAccessPlan(columnInfo JSONColumnMetadata, typeCache map[string][]FieldDataType,
func (key *TelemetryFieldKey) SetJSONAccessPlan(columnInfo JSONColumnMetadata, typeCache map[string][]JSONDataType,
) error {
// if path is empty, return nil
if key.Name == "" {

View File

@@ -19,11 +19,11 @@ const (
// ============================================================================
// makeKey creates a TelemetryFieldKey for testing.
func makeKey(name string, dataType FieldDataType, materialized bool) *TelemetryFieldKey {
func makeKey(name string, dataType JSONDataType, materialized bool) *TelemetryFieldKey {
return &TelemetryFieldKey{
Name: name,
FieldDataType: dataType,
Materialized: materialized,
Name: name,
JSONDataType: &dataType,
Materialized: materialized,
}
}
@@ -242,7 +242,7 @@ func TestPlanJSON_BasicStructure(t *testing.T) {
}{
{
name: "Simple path not promoted",
key: makeKey("user.name", FieldDataTypeString, false),
key: makeKey("user.name", String, false),
expectedYAML: fmt.Sprintf(`
- name: user.name
column: %s
@@ -255,7 +255,7 @@ func TestPlanJSON_BasicStructure(t *testing.T) {
},
{
name: "Simple path promoted",
key: makeKey("user.name", FieldDataTypeString, true),
key: makeKey("user.name", String, true),
expectedYAML: fmt.Sprintf(`
- name: user.name
column: %s
@@ -276,7 +276,7 @@ func TestPlanJSON_BasicStructure(t *testing.T) {
},
{
name: "Empty path returns error",
key: makeKey("", FieldDataTypeString, false),
key: makeKey("", String, false),
expectErr: true,
expectedYAML: "",
},
@@ -431,7 +431,7 @@ func TestPlanJSON_ArrayPaths(t *testing.T) {
for _, tt := range tests {
t.Run(tt.name, func(t *testing.T) {
key := makeKey(tt.path, FieldDataTypeString, false)
key := makeKey(tt.path, String, false)
err := key.SetJSONAccessPlan(JSONColumnMetadata{
BaseColumn: bodyV2Column,
PromotedColumn: bodyPromotedColumn,
@@ -450,7 +450,7 @@ func TestPlanJSON_PromotedVsNonPromoted(t *testing.T) {
path := "education[].awards[].type"
t.Run("Non-promoted plan", func(t *testing.T) {
key := makeKey(path, FieldDataTypeString, false)
key := makeKey(path, String, false)
err := key.SetJSONAccessPlan(JSONColumnMetadata{
BaseColumn: bodyV2Column,
PromotedColumn: bodyPromotedColumn,
@@ -493,7 +493,7 @@ func TestPlanJSON_PromotedVsNonPromoted(t *testing.T) {
})
t.Run("Promoted plan", func(t *testing.T) {
key := makeKey(path, FieldDataTypeString, true)
key := makeKey(path, String, true)
err := key.SetJSONAccessPlan(JSONColumnMetadata{
BaseColumn: bodyV2Column,
PromotedColumn: bodyPromotedColumn,
@@ -666,12 +666,12 @@ func TestPlanJSON_EdgeCases(t *testing.T) {
for _, tt := range tests {
t.Run(tt.name, func(t *testing.T) {
// Choose key type based on path; operator does not affect the tree shape asserted here.
keyType := FieldDataTypeString
keyType := String
switch tt.path {
case "education":
keyType = FieldDataTypeArrayJSON
keyType = ArrayJSON
case "education[].type":
keyType = FieldDataTypeString
keyType = String
}
key := makeKey(tt.path, keyType, false)
err := key.SetJSONAccessPlan(JSONColumnMetadata{
@@ -692,7 +692,7 @@ func TestPlanJSON_EdgeCases(t *testing.T) {
func TestPlanJSON_TreeStructure(t *testing.T) {
types, _ := TestJSONTypeSet()
path := "education[].awards[].participated[].team[].branch"
key := makeKey(path, FieldDataTypeString, false)
key := makeKey(path, String, false)
err := key.SetJSONAccessPlan(JSONColumnMetadata{
BaseColumn: bodyV2Column,
PromotedColumn: bodyPromotedColumn,

View File

@@ -46,6 +46,14 @@ var MappingStringToJSONDataType = map[string]JSONDataType{
"Array(JSON)": ArrayJSON,
}
var ScalerTypeToArrayType = map[JSONDataType]JSONDataType{
String: ArrayString,
Int64: ArrayInt64,
Float64: ArrayFloat64,
Bool: ArrayBool,
Dynamic: ArrayDynamic,
}
var MappingFieldDataTypeToJSONDataType = map[FieldDataType]JSONDataType{
FieldDataTypeString: String,
FieldDataTypeInt64: Int64,
@@ -55,8 +63,18 @@ var MappingFieldDataTypeToJSONDataType = map[FieldDataType]JSONDataType{
FieldDataTypeArrayString: ArrayString,
FieldDataTypeArrayInt64: ArrayInt64,
FieldDataTypeArrayFloat64: ArrayFloat64,
FieldDataTypeArrayNumber: ArrayFloat64,
FieldDataTypeArrayBool: ArrayBool,
FieldDataTypeArrayDynamic: ArrayDynamic,
FieldDataTypeArrayJSON: ArrayJSON,
}
var MappingJSONDataTypeToFieldDataType = map[JSONDataType]FieldDataType{
String: FieldDataTypeString,
Int64: FieldDataTypeInt64,
Float64: FieldDataTypeFloat64,
Bool: FieldDataTypeBool,
ArrayString: FieldDataTypeArrayString,
ArrayInt64: FieldDataTypeArrayInt64,
ArrayFloat64: FieldDataTypeArrayFloat64,
ArrayBool: FieldDataTypeArrayBool,
ArrayDynamic: FieldDataTypeArrayDynamic,
ArrayJSON: FieldDataTypeArrayObject,
}

View File

@@ -4,69 +4,69 @@ package telemetrytypes
// Test JSON Type Set Data Setup
// ============================================================================
// TestJSONTypeSet returns a map of path->field data types for testing.
// TestJSONTypeSet returns a map of path->types for testing.
// This represents the type information available in the test JSON structure.
func TestJSONTypeSet() (map[string][]FieldDataType, MetadataStore) {
types := map[string][]FieldDataType{
func TestJSONTypeSet() (map[string][]JSONDataType, MetadataStore) {
types := map[string][]JSONDataType{
// ── user (primitives) ─────────────────────────────────────────────
"user.name": {FieldDataTypeString},
"user.permissions": {FieldDataTypeArrayString},
"user.age": {FieldDataTypeInt64, FieldDataTypeString}, // Int64/String ambiguity
"user.height": {FieldDataTypeFloat64},
"user.active": {FieldDataTypeBool}, // Bool — not IndexSupported
"user.name": {String},
"user.permissions": {ArrayString},
"user.age": {Int64, String}, // Int64/String ambiguity
"user.height": {Float64},
"user.active": {Bool}, // Bool — not IndexSupported
// Deeper non-array nesting (a.b.c — no array hops)
"user.address.zip": {FieldDataTypeInt64},
"user.address.zip": {Int64},
// ── education[] ───────────────────────────────────────────────────
// Pattern: x[].y
"education": {FieldDataTypeArrayJSON},
"education[].name": {FieldDataTypeString},
"education[].type": {FieldDataTypeString, FieldDataTypeInt64},
"education[].year": {FieldDataTypeInt64},
"education[].scores": {FieldDataTypeArrayInt64},
"education[].parameters": {FieldDataTypeArrayFloat64, FieldDataTypeArrayDynamic},
"education": {ArrayJSON},
"education[].name": {String},
"education[].type": {String, Int64},
"education[].year": {Int64},
"education[].scores": {ArrayInt64},
"education[].parameters": {ArrayFloat64, ArrayDynamic},
// Pattern: x[].y[]
"education[].awards": {FieldDataTypeArrayDynamic, FieldDataTypeArrayJSON},
"education[].awards": {ArrayDynamic, ArrayJSON},
// Pattern: x[].y[].z
"education[].awards[].name": {FieldDataTypeString},
"education[].awards[].type": {FieldDataTypeString},
"education[].awards[].semester": {FieldDataTypeInt64},
"education[].awards[].name": {String},
"education[].awards[].type": {String},
"education[].awards[].semester": {Int64},
// Pattern: x[].y[].z[]
"education[].awards[].participated": {FieldDataTypeArrayDynamic, FieldDataTypeArrayJSON},
"education[].awards[].participated": {ArrayDynamic, ArrayJSON},
// Pattern: x[].y[].z[].w
"education[].awards[].participated[].members": {FieldDataTypeArrayString},
"education[].awards[].participated[].members": {ArrayString},
// Pattern: x[].y[].z[].w[]
"education[].awards[].participated[].team": {FieldDataTypeArrayJSON},
"education[].awards[].participated[].team": {ArrayJSON},
// Pattern: x[].y[].z[].w[].v
"education[].awards[].participated[].team[].branch": {FieldDataTypeString},
"education[].awards[].participated[].team[].branch": {String},
// ── interests[] ───────────────────────────────────────────────────
"interests": {FieldDataTypeArrayJSON},
"interests[].entities": {FieldDataTypeArrayJSON},
"interests[].entities[].reviews": {FieldDataTypeArrayJSON},
"interests[].entities[].reviews[].entries": {FieldDataTypeArrayJSON},
"interests[].entities[].reviews[].entries[].metadata": {FieldDataTypeArrayJSON},
"interests[].entities[].reviews[].entries[].metadata[].positions": {FieldDataTypeArrayJSON},
"interests[].entities[].reviews[].entries[].metadata[].positions[].name": {FieldDataTypeString},
"interests[].entities[].reviews[].entries[].metadata[].positions[].ratings": {FieldDataTypeArrayInt64, FieldDataTypeArrayString},
"http-events": {FieldDataTypeArrayJSON},
"http-events[].request-info.host": {FieldDataTypeString},
"ids": {FieldDataTypeArrayDynamic},
"interests": {ArrayJSON},
"interests[].entities": {ArrayJSON},
"interests[].entities[].reviews": {ArrayJSON},
"interests[].entities[].reviews[].entries": {ArrayJSON},
"interests[].entities[].reviews[].entries[].metadata": {ArrayJSON},
"interests[].entities[].reviews[].entries[].metadata[].positions": {ArrayJSON},
"interests[].entities[].reviews[].entries[].metadata[].positions[].name": {String},
"interests[].entities[].reviews[].entries[].metadata[].positions[].ratings": {ArrayInt64, ArrayString},
"http-events": {ArrayJSON},
"http-events[].request-info.host": {String},
"ids": {ArrayDynamic},
// ── top-level primitives ──────────────────────────────────────────
"message": {FieldDataTypeString},
"http-status": {FieldDataTypeInt64, FieldDataTypeString}, // hyphen in root key, ambiguous
"message": {String},
"http-status": {Int64, String}, // hyphen in root key, ambiguous
// ── top-level nested objects (no array hops) ───────────────────────
"response.time-taken": {FieldDataTypeFloat64}, // hyphen inside nested key
"response.time-taken": {Float64}, // hyphen inside nested key
}
return types, nil

View File

@@ -490,25 +490,24 @@ def test_traces_list(
"name": "A",
"signal": "traces",
"disabled": False,
"selectFields": [
{"name": "span_id"},
{"name": "span.timestamp"},
{"name": "trace_id"},
],
"order": [{"key": {"name": "timestamp"}, "direction": "desc"}],
"limit": 1,
},
},
HTTPStatus.OK,
lambda x: [
x[3].duration_nano,
x[3].name,
x[3].response_status_code,
x[3].service_name,
x[3].span_id,
format_timestamp(x[3].timestamp),
x[3].trace_id,
], # type: Callable[[List[Traces]], List[Any]]
),
# Case 2: order by attribute timestamp field which is there in attributes as well
# This should break but it doesn't because attribute.timestamp gets adjusted to timestamp
# because of default trace.timestamp gets added by default and bug in field mapper picks
# instrinsic field
# attribute.timestamp gets adjusted to span.timestamp
pytest.param(
{
"type": "builder_query",
@@ -516,6 +515,11 @@ def test_traces_list(
"name": "A",
"signal": "traces",
"disabled": False,
"selectFields": [
{"name": "span_id"},
{"name": "span.timestamp"},
{"name": "trace_id"},
],
"order": [
{"key": {"name": "attribute.timestamp"}, "direction": "desc"}
],
@@ -524,10 +528,6 @@ def test_traces_list(
},
HTTPStatus.OK,
lambda x: [
x[3].duration_nano,
x[3].name,
x[3].response_status_code,
x[3].service_name,
x[3].span_id,
format_timestamp(x[3].timestamp),
x[3].trace_id,
@@ -553,7 +553,7 @@ def test_traces_list(
], # type: Callable[[List[Traces]], List[Any]]
),
# Case 4: select attribute.timestamp with empty order by
# This doesn't return any data because of where_clause using aliased timestamp
# This returns the one span which has attribute.timestamp
pytest.param(
{
"type": "builder_query",
@@ -567,7 +567,11 @@ def test_traces_list(
},
},
HTTPStatus.OK,
lambda x: [], # type: Callable[[List[Traces]], List[Any]]
lambda x: [
x[0].span_id,
format_timestamp(x[0].timestamp),
x[0].trace_id,
], # type: Callable[[List[Traces]], List[Any]]
),
# Case 5: select timestamp with timestamp order by
pytest.param(
@@ -706,6 +710,112 @@ def test_traces_list_with_corrupt_data(
assert data[key] == value
@pytest.mark.parametrize(
"select_fields,status_code,expected_keys",
[
pytest.param(
[],
HTTPStatus.OK,
[
# all intrinsic column
"timestamp",
"trace_id",
"span_id",
"trace_state",
"parent_span_id",
"flags",
"name",
"kind",
"kind_string",
"duration_nano",
"status_code",
"status_message",
"status_code_string",
"events",
"links",
# all calculated columns
"response_status_code",
"external_http_url",
"http_url",
"external_http_method",
"http_method",
"http_host",
"db_name",
"db_operation",
"has_error",
"is_remote",
# all contextual columns (merged in response layer)
"attributes",
"resource",
],
),
pytest.param(
[
{"name": "service.name"},
],
HTTPStatus.OK,
["timestamp", "trace_id", "span_id", "service.name"],
),
],
)
def test_traces_list_with_select_fields(
signoz: types.SigNoz,
create_user_admin: None, # pylint: disable=unused-argument
get_token: Callable[[str, str], str],
insert_traces: Callable[[List[Traces]], None],
select_fields: List[dict],
status_code: HTTPStatus,
expected_keys: List[str],
) -> None:
"""
Setup:
Insert 4 traces with different attributes.
Tests:
1. Empty select fields should return all the fields.
2. Non empty select field should return the select field along with timestamp, trace_id and span_id.
"""
traces = (
generate_traces_with_corrupt_metadata()
) # using this as the data doesn't matter
insert_traces(traces)
token = get_token(USER_ADMIN_EMAIL, USER_ADMIN_PASSWORD)
payload = {
"type": "builder_query",
"spec": {
"name": "A",
"signal": "traces",
"selectFields": select_fields,
"order": [{"key": {"name": "timestamp"}, "direction": "desc"}],
"limit": 1,
},
}
response = make_query_request(
signoz,
token,
start_ms=int(
(datetime.now(tz=timezone.utc) - timedelta(minutes=5)).timestamp() * 1000
),
end_ms=int(datetime.now(tz=timezone.utc).timestamp() * 1000),
request_type="raw",
queries=[payload],
)
assert response.status_code == status_code
if response.status_code == HTTPStatus.OK:
data = response.json()
assert len(data["data"]["data"]["results"][0]["rows"][0]["data"].keys()) == len(
expected_keys
)
assert set(data["data"]["data"]["results"][0]["rows"][0]["data"].keys()) == set(
expected_keys
)
@pytest.mark.parametrize(
"order_by,aggregation_alias,expected_status",
[
@@ -2131,9 +2241,9 @@ def test_traces_list_filter_by_trace_id(
narrow_start_ms = int((now - timedelta(minutes=5)).timestamp() * 1000)
narrow_rows = _query(narrow_start_ms, now_ms)
assert (
len(narrow_rows) == 1
), f"Expected 1 span for trace_id filter (narrow window), got {len(narrow_rows)}"
assert len(narrow_rows) == 1, (
f"Expected 1 span for trace_id filter (narrow window), got {len(narrow_rows)}"
)
assert narrow_rows[0]["data"]["span_id"] == span_id_root
assert narrow_rows[0]["data"]["trace_id"] == target_trace_id