Compare commits

..

39 Commits

Author SHA1 Message Date
Piyush Singariya
9176ef0589 Merge branch 'main' into feat/json-qb-tests 2026-04-10 19:59:46 +05:30
Piyush Singariya
5c2a338189 test: updated validation 2026-04-10 19:59:29 +05:30
Piyush Singariya
704bab23cf Merge branch 'main' into feat/json-qb-tests 2026-04-09 18:04:25 +05:30
Piyush Singariya
371da26b3c fix: test 2026-04-09 18:03:57 +05:30
Piyush Singariya
97fbfbdc13 fix: type ambiguity 2026-04-09 13:58:13 +05:30
Piyush Singariya
4b112988ef Merge branch 'main' into feat/json-qb-tests 2026-04-09 12:19:26 +05:30
Piyush Singariya
a47ecf3907 test: with higher migrator version 2026-04-09 12:16:31 +05:30
Piyush Singariya
e4a78cf556 fix: dynamically change insert stmt for body_v2 availability 2026-04-08 22:00:05 +05:30
Piyush Singariya
b6adecc294 fix: better validations 2026-04-08 19:29:28 +05:30
Piyush Singariya
40333a5fee Merge branch 'main' into feat/json-qb-tests 2026-04-08 19:08:34 +05:30
Piyush Singariya
4af6a9abae Merge branch 'main' into feat/json-qb-tests 2026-04-07 21:49:21 +05:30
Piyush Singariya
55e892dad3 fix: body tests ready 2026-04-07 16:22:26 +05:30
Piyush Singariya
181116308f fix: logs.py 2026-04-07 15:56:21 +05:30
Piyush Singariya
eaa678910b fix: better tests 2026-04-07 14:49:52 +05:30
Piyush Singariya
e994caeb02 chore: import tests from older pr 2026-04-07 13:43:16 +05:30
Piyush Singariya
10840f8495 ci: lint changes 2026-04-07 12:31:22 +05:30
Piyush Singariya
1fcd3adfc8 Merge branch 'main' into fix/array-json 2026-04-07 12:29:35 +05:30
Piyush Singariya
3e14b26b00 fix: negative operator check 2026-04-07 12:26:44 +05:30
Piyush Singariya
b30bfa6371 fix: better review for test file 2026-04-02 12:53:44 +05:30
Piyush Singariya
e7f4a04b36 Merge branch 'main' into fix/array-json 2026-04-01 15:44:01 +05:30
Piyush Singariya
0687634da3 fix: stringified integer value input 2026-04-01 15:25:35 +05:30
Piyush Singariya
7e7732243e fix: dynamic array tests 2026-04-01 12:54:26 +05:30
Piyush Singariya
2f952e402f feat: change filtering of dynamic arrays 2026-04-01 12:09:32 +05:30
Piyush Singariya
a12febca4a fix: array json element comparison 2026-04-01 10:34:55 +05:30
Piyush Singariya
cb71c9c3f7 Merge branch 'main' into fix/array-json 2026-03-31 15:42:09 +05:30
Piyush Singariya
1cd4ce6509 Merge branch 'main' into fix/array-json 2026-03-31 14:55:36 +05:30
Piyush Singariya
9299c8ab18 fix: indexed unit tests 2026-03-30 15:47:47 +05:30
Piyush Singariya
24749de269 fix: comment 2026-03-30 15:16:28 +05:30
Piyush Singariya
39098ec3f4 fix: unit tests 2026-03-30 15:12:17 +05:30
Piyush Singariya
fe554f5c94 fix: remove not used paths from testdata 2026-03-30 14:24:48 +05:30
Piyush Singariya
8a60a041a6 fix: unit tests 2026-03-30 14:14:49 +05:30
Piyush Singariya
541f19c34a fix: array type filtering from dynamic arrays 2026-03-30 12:59:31 +05:30
Piyush Singariya
010db03d6e fix: indexed tests passing 2026-03-30 12:24:26 +05:30
Piyush Singariya
5408acbd8c fix: primitive conditions working 2026-03-30 12:01:35 +05:30
Piyush Singariya
0de6c85f81 feat: align negative operators to include other logs 2026-03-28 10:30:11 +05:30
Piyush Singariya
69ec24fa05 test: fix unit tests 2026-03-27 15:12:49 +05:30
Piyush Singariya
539d732b65 fix: contextual path index usage 2026-03-27 14:44:51 +05:30
Piyush Singariya
843d5fb199 Merge branch 'main' into feat/json-index 2026-03-27 14:17:52 +05:30
Piyush Singariya
fabdfb8cc1 feat: enable JSON Path index 2026-03-27 14:07:37 +05:30
23 changed files with 2216 additions and 302 deletions

View File

@@ -52,6 +52,7 @@ jobs:
- ingestionkeys
- rootuser
- serviceaccount
- querier_json_body
sqlstore-provider:
- postgres
- sqlite
@@ -61,7 +62,7 @@ jobs:
- 25.5.6
- 25.12.5
schema-migrator-version:
- v0.142.0
- v0.144.2
postgres-version:
- 15
if: |

View File

@@ -14,7 +14,7 @@ import (
sdkmetric "go.opentelemetry.io/otel/metric"
sdkmetricnoop "go.opentelemetry.io/otel/metric/noop"
sdkresource "go.opentelemetry.io/otel/sdk/resource"
semconv "go.opentelemetry.io/otel/semconv/v1.40.0"
semconv "go.opentelemetry.io/otel/semconv/v1.39.0"
sdktrace "go.opentelemetry.io/otel/trace"
)

View File

@@ -207,16 +207,23 @@ func AdjustKey(key *telemetrytypes.TelemetryFieldKey, keys map[string][]*telemet
indexes := []telemetrytypes.JSONDataTypeIndex{}
fieldContextsSeen := map[telemetrytypes.FieldContext]bool{}
dataTypesSeen := map[telemetrytypes.FieldDataType]bool{}
jsonTypesSeen := map[string]*telemetrytypes.JSONDataType{}
for _, matchingKey := range matchingKeys {
materialized = materialized && matchingKey.Materialized
fieldContextsSeen[matchingKey.FieldContext] = true
dataTypesSeen[matchingKey.FieldDataType] = true
if matchingKey.JSONDataType != nil {
jsonTypesSeen[matchingKey.JSONDataType.StringValue()] = matchingKey.JSONDataType
}
indexes = append(indexes, matchingKey.Indexes...)
}
for _, matchingKey := range contextPrefixedMatchingKeys {
materialized = materialized && matchingKey.Materialized
fieldContextsSeen[matchingKey.FieldContext] = true
dataTypesSeen[matchingKey.FieldDataType] = true
if matchingKey.JSONDataType != nil {
jsonTypesSeen[matchingKey.JSONDataType.StringValue()] = matchingKey.JSONDataType
}
indexes = append(indexes, matchingKey.Indexes...)
}
key.Materialized = materialized
@@ -241,6 +248,15 @@ func AdjustKey(key *telemetrytypes.TelemetryFieldKey, keys map[string][]*telemet
break
}
}
if len(jsonTypesSeen) == 1 && key.JSONDataType == nil {
// all matching keys have same JSON data type, use it
for _, jt := range jsonTypesSeen {
actions = append(actions, fmt.Sprintf("Adjusting key %s to have JSON data type %s", key, jt.StringValue()))
key.JSONDataType = jt
break
}
}
}
return actions

View File

@@ -276,12 +276,11 @@ func (m *fieldMapper) FieldFor(ctx context.Context, tsStart, tsEnd uint64, key *
continue
}
if key.FieldDataType == telemetrytypes.FieldDataTypeUnspecified {
if key.JSONDataType == nil {
return "", qbtypes.ErrColumnNotFound
}
jdt := key.GetJSONDataType()
if key.KeyNameContainsArray() && !jdt.IsArray {
if key.KeyNameContainsArray() && !key.JSONDataType.IsArray {
return "", errors.NewInvalidInputf(errors.CodeInvalidInput, "FieldFor not supported for nested fields; only supported for flat paths (e.g. body.status.detail) and paths of Array type: %s(%s)", key.Name, key.FieldDataType)
}

View File

@@ -220,7 +220,7 @@ func TestJSONStmtBuilder_PrimitivePaths(t *testing.T) {
expected: TestExpected{
WhereClause: "(((LOWER(toString(dynamicElement(body_v2.`user.age`, 'Int64'))) LIKE LOWER(?)) AND has(JSONAllPaths(body_v2), 'user.age')) OR ((LOWER(dynamicElement(body_v2.`user.age`, 'String')) LIKE LOWER(?)) AND has(JSONAllPaths(body_v2), 'user.age')))",
Args: []any{uint64(1747945619), uint64(1747983448), "%25%", "%25%", "1747947419000000000", uint64(1747945619), "1747983448000000000", uint64(1747983448), 10},
Warnings: []string{"Key `user.age` is ambiguous, found 2 different combinations of field context / data type: [name=user.age,context=body,datatype=int64 name=user.age,context=body,datatype=string]."},
Warnings: []string{"Key `user.age` is ambiguous, found 2 different combinations of field context / data type: [name=user.age,context=body,datatype=int64,jsondatatype=Int64 name=user.age,context=body,datatype=string,jsondatatype=String]."},
},
},
{
@@ -414,7 +414,7 @@ func TestStatementBuilderListQueryBodyPromoted(t *testing.T) {
},
expected: TestExpected{
Args: []any{uint64(1747945619), uint64(1747983448), "%1.65%", 1.65, "%1.65%", 1.65, "%1.65%", 1.65, "%1.65%", 1.65, "1747947419000000000", uint64(1747945619), "1747983448000000000", uint64(1747983448), 10},
Warnings: []string{"Key `education[].parameters` is ambiguous, found 2 different combinations of field context / data type: [name=education[].parameters,context=body,datatype=[]float64,materialized=true name=education[].parameters,context=body,datatype=[]dynamic,materialized=true]."},
Warnings: []string{"Key `education[].parameters` is ambiguous, found 2 different combinations of field context / data type: [name=education[].parameters,context=body,datatype=[]float64,materialized=true,jsondatatype=Array(Nullable(Float64)) name=education[].parameters,context=body,datatype=[]dynamic,materialized=true,jsondatatype=Array(Dynamic)]."},
},
expectedErr: nil,
},
@@ -441,7 +441,7 @@ func TestStatementBuilderListQueryBodyPromoted(t *testing.T) {
},
expected: TestExpected{
Args: []any{uint64(1747945619), uint64(1747983448), "%true%", true, "%true%", true, "%true%", true, "%true%", true, "1747947419000000000", uint64(1747945619), "1747983448000000000", uint64(1747983448), 10},
Warnings: []string{"Key `education[].parameters` is ambiguous, found 2 different combinations of field context / data type: [name=education[].parameters,context=body,datatype=[]float64,materialized=true name=education[].parameters,context=body,datatype=[]dynamic,materialized=true]."},
Warnings: []string{"Key `education[].parameters` is ambiguous, found 2 different combinations of field context / data type: [name=education[].parameters,context=body,datatype=[]float64,materialized=true,jsondatatype=Array(Nullable(Float64)) name=education[].parameters,context=body,datatype=[]dynamic,materialized=true,jsondatatype=Array(Dynamic)]."},
},
expectedErr: nil,
},
@@ -455,7 +455,7 @@ func TestStatementBuilderListQueryBodyPromoted(t *testing.T) {
},
expected: TestExpected{
Args: []any{uint64(1747945619), uint64(1747983448), "%passed%", "passed", "%passed%", "passed", "%passed%", "passed", "%passed%", "passed", "1747947419000000000", uint64(1747945619), "1747983448000000000", uint64(1747983448), 10},
Warnings: []string{"Key `education[].parameters` is ambiguous, found 2 different combinations of field context / data type: [name=education[].parameters,context=body,datatype=[]float64,materialized=true name=education[].parameters,context=body,datatype=[]dynamic,materialized=true]."},
Warnings: []string{"Key `education[].parameters` is ambiguous, found 2 different combinations of field context / data type: [name=education[].parameters,context=body,datatype=[]float64,materialized=true,jsondatatype=Array(Nullable(Float64)) name=education[].parameters,context=body,datatype=[]dynamic,materialized=true,jsondatatype=Array(Dynamic)]."},
},
expectedErr: nil,
},
@@ -549,7 +549,7 @@ func TestJSONStmtBuilder_ArrayPaths(t *testing.T) {
expected: TestExpected{
WhereClause: "((NOT arrayExists(`body_v2.education`-> toFloat64OrNull(dynamicElement(`body_v2.education`.`type`, 'String')) = ?, dynamicElement(body_v2.`education`, 'Array(JSON(max_dynamic_types=16, max_dynamic_paths=0))'))) AND (NOT arrayExists(`body_v2.education`-> dynamicElement(`body_v2.education`.`type`, 'Int64') = ?, dynamicElement(body_v2.`education`, 'Array(JSON(max_dynamic_types=16, max_dynamic_paths=0))'))))",
Args: []any{uint64(1747945619), uint64(1747983448), int64(10001), int64(10001), "1747947419000000000", uint64(1747945619), "1747983448000000000", uint64(1747983448), 10},
Warnings: []string{"Key `education[].type` is ambiguous, found 2 different combinations of field context / data type: [name=education[].type,context=body,datatype=string name=education[].type,context=body,datatype=int64]."},
Warnings: []string{"Key `education[].type` is ambiguous, found 2 different combinations of field context / data type: [name=education[].type,context=body,datatype=string,jsondatatype=String name=education[].type,context=body,datatype=int64,jsondatatype=Int64]."},
},
},
{
@@ -576,7 +576,7 @@ func TestJSONStmtBuilder_ArrayPaths(t *testing.T) {
expected: TestExpected{
WhereClause: "(((arrayExists(`body_v2.education`-> (arrayExists(x -> LOWER(toString(x)) LIKE LOWER(?), dynamicElement(`body_v2.education`.`parameters`, 'Array(Nullable(Float64))')) OR arrayExists(x -> toFloat64(x) = ?, dynamicElement(`body_v2.education`.`parameters`, 'Array(Nullable(Float64))'))), dynamicElement(body_v2.`education`, 'Array(JSON(max_dynamic_types=16, max_dynamic_paths=0))'))) AND has(JSONAllPaths(body_v2), 'education')) OR ((arrayExists(`body_v2.education`-> (arrayExists(x -> LOWER(toString(x)) LIKE LOWER(?), arrayFilter(x->(dynamicType(x) IN ('String', 'Int64', 'Float64', 'Bool')), dynamicElement(`body_v2.education`.`parameters`, 'Array(Dynamic)'))) OR arrayExists(x -> accurateCastOrNull(x, 'Float64') = ?, arrayFilter(x->(dynamicType(x) IN ('String', 'Int64', 'Float64', 'Bool')), dynamicElement(`body_v2.education`.`parameters`, 'Array(Dynamic)')))), dynamicElement(body_v2.`education`, 'Array(JSON(max_dynamic_types=16, max_dynamic_paths=0))'))) AND has(JSONAllPaths(body_v2), 'education')))",
Args: []any{uint64(1747945619), uint64(1747983448), "%1.65%", 1.65, "%1.65%", 1.65, "1747947419000000000", uint64(1747945619), "1747983448000000000", uint64(1747983448), 10},
Warnings: []string{"Key `education[].parameters` is ambiguous, found 2 different combinations of field context / data type: [name=education[].parameters,context=body,datatype=[]float64 name=education[].parameters,context=body,datatype=[]dynamic]."},
Warnings: []string{"Key `education[].parameters` is ambiguous, found 2 different combinations of field context / data type: [name=education[].parameters,context=body,datatype=[]float64,jsondatatype=Array(Nullable(Float64)) name=education[].parameters,context=body,datatype=[]dynamic,jsondatatype=Array(Dynamic)]."},
},
},
{
@@ -585,7 +585,7 @@ func TestJSONStmtBuilder_ArrayPaths(t *testing.T) {
expected: TestExpected{
WhereClause: "(((arrayExists(`body_v2.education`-> arrayExists(x -> toString(x) = ?, dynamicElement(`body_v2.education`.`parameters`, 'Array(Nullable(Float64))')), dynamicElement(body_v2.`education`, 'Array(JSON(max_dynamic_types=16, max_dynamic_paths=0))'))) AND has(JSONAllPaths(body_v2), 'education')) OR ((arrayExists(`body_v2.education`-> arrayExists(x -> toString(x) = ?, arrayFilter(x->(dynamicType(x) IN ('String', 'Int64', 'Float64', 'Bool')), dynamicElement(`body_v2.education`.`parameters`, 'Array(Dynamic)'))), dynamicElement(body_v2.`education`, 'Array(JSON(max_dynamic_types=16, max_dynamic_paths=0))'))) AND has(JSONAllPaths(body_v2), 'education')))",
Args: []any{uint64(1747945619), uint64(1747983448), "passed", "passed", "1747947419000000000", uint64(1747945619), "1747983448000000000", uint64(1747983448), 10},
Warnings: []string{"Key `education[].parameters` is ambiguous, found 2 different combinations of field context / data type: [name=education[].parameters,context=body,datatype=[]float64 name=education[].parameters,context=body,datatype=[]dynamic]."},
Warnings: []string{"Key `education[].parameters` is ambiguous, found 2 different combinations of field context / data type: [name=education[].parameters,context=body,datatype=[]float64,jsondatatype=Array(Nullable(Float64)) name=education[].parameters,context=body,datatype=[]dynamic,jsondatatype=Array(Dynamic)]."},
},
},
{
@@ -594,7 +594,7 @@ func TestJSONStmtBuilder_ArrayPaths(t *testing.T) {
expected: TestExpected{
WhereClause: "(((arrayExists(`body_v2.education`-> (arrayExists(x -> LOWER(toString(x)) LIKE LOWER(?), dynamicElement(`body_v2.education`.`parameters`, 'Array(Nullable(Float64))')) OR arrayExists(x -> toString(x) = ?, dynamicElement(`body_v2.education`.`parameters`, 'Array(Nullable(Float64))'))), dynamicElement(body_v2.`education`, 'Array(JSON(max_dynamic_types=16, max_dynamic_paths=0))'))) AND has(JSONAllPaths(body_v2), 'education')) OR ((arrayExists(`body_v2.education`-> (arrayExists(x -> LOWER(toString(x)) LIKE LOWER(?), arrayFilter(x->(dynamicType(x) IN ('String', 'Int64', 'Float64', 'Bool')), dynamicElement(`body_v2.education`.`parameters`, 'Array(Dynamic)'))) OR arrayExists(x -> toString(x) = ?, arrayFilter(x->(dynamicType(x) IN ('String', 'Int64', 'Float64', 'Bool')), dynamicElement(`body_v2.education`.`parameters`, 'Array(Dynamic)')))), dynamicElement(body_v2.`education`, 'Array(JSON(max_dynamic_types=16, max_dynamic_paths=0))'))) AND has(JSONAllPaths(body_v2), 'education')))",
Args: []any{uint64(1747945619), uint64(1747983448), "%passed%", "passed", "%passed%", "passed", "1747947419000000000", uint64(1747945619), "1747983448000000000", uint64(1747983448), 10},
Warnings: []string{"Key `education[].parameters` is ambiguous, found 2 different combinations of field context / data type: [name=education[].parameters,context=body,datatype=[]float64 name=education[].parameters,context=body,datatype=[]dynamic]."},
Warnings: []string{"Key `education[].parameters` is ambiguous, found 2 different combinations of field context / data type: [name=education[].parameters,context=body,datatype=[]float64,jsondatatype=Array(Nullable(Float64)) name=education[].parameters,context=body,datatype=[]dynamic,jsondatatype=Array(Dynamic)]."},
},
},
{
@@ -603,7 +603,7 @@ func TestJSONStmtBuilder_ArrayPaths(t *testing.T) {
expected: TestExpected{
WhereClause: "(((arrayExists(`body_v2.education`-> arrayExists(x -> toFloat64(x) IN (?, ?), dynamicElement(`body_v2.education`.`parameters`, 'Array(Nullable(Float64))')), dynamicElement(body_v2.`education`, 'Array(JSON(max_dynamic_types=16, max_dynamic_paths=0))'))) AND has(JSONAllPaths(body_v2), 'education')) OR ((arrayExists(`body_v2.education`-> arrayExists(x -> toString(x) IN (?, ?), arrayFilter(x->(dynamicType(x) IN ('String', 'Int64', 'Float64', 'Bool')), dynamicElement(`body_v2.education`.`parameters`, 'Array(Dynamic)'))), dynamicElement(body_v2.`education`, 'Array(JSON(max_dynamic_types=16, max_dynamic_paths=0))'))) AND has(JSONAllPaths(body_v2), 'education')))",
Args: []any{uint64(1747945619), uint64(1747983448), 1.65, 1.99, "1.65", "1.99", "1747947419000000000", uint64(1747945619), "1747983448000000000", uint64(1747983448), 10},
Warnings: []string{"Key `education[].parameters` is ambiguous, found 2 different combinations of field context / data type: [name=education[].parameters,context=body,datatype=[]float64 name=education[].parameters,context=body,datatype=[]dynamic]."},
Warnings: []string{"Key `education[].parameters` is ambiguous, found 2 different combinations of field context / data type: [name=education[].parameters,context=body,datatype=[]float64,jsondatatype=Array(Nullable(Float64)) name=education[].parameters,context=body,datatype=[]dynamic,jsondatatype=Array(Dynamic)]."},
},
},
{
@@ -612,7 +612,7 @@ func TestJSONStmtBuilder_ArrayPaths(t *testing.T) {
expected: TestExpected{
WhereClause: "((NOT arrayExists(`body_v2.education`-> (arrayExists(x -> LOWER(toString(x)) LIKE LOWER(?), dynamicElement(`body_v2.education`.`parameters`, 'Array(Nullable(Float64))')) OR arrayExists(x -> toFloat64(x) = ?, dynamicElement(`body_v2.education`.`parameters`, 'Array(Nullable(Float64))'))), dynamicElement(body_v2.`education`, 'Array(JSON(max_dynamic_types=16, max_dynamic_paths=0))'))) AND (NOT arrayExists(`body_v2.education`-> (arrayExists(x -> LOWER(toString(x)) LIKE LOWER(?), arrayFilter(x->(dynamicType(x) IN ('String', 'Int64', 'Float64', 'Bool')), dynamicElement(`body_v2.education`.`parameters`, 'Array(Dynamic)'))) OR arrayExists(x -> accurateCastOrNull(x, 'Float64') = ?, arrayFilter(x->(dynamicType(x) IN ('String', 'Int64', 'Float64', 'Bool')), dynamicElement(`body_v2.education`.`parameters`, 'Array(Dynamic)')))), dynamicElement(body_v2.`education`, 'Array(JSON(max_dynamic_types=16, max_dynamic_paths=0))'))))",
Args: []any{uint64(1747945619), uint64(1747983448), "%1.65%", float64(1.65), "%1.65%", float64(1.65), "1747947419000000000", uint64(1747945619), "1747983448000000000", uint64(1747983448), 10},
Warnings: []string{"Key `education[].parameters` is ambiguous, found 2 different combinations of field context / data type: [name=education[].parameters,context=body,datatype=[]float64 name=education[].parameters,context=body,datatype=[]dynamic]."},
Warnings: []string{"Key `education[].parameters` is ambiguous, found 2 different combinations of field context / data type: [name=education[].parameters,context=body,datatype=[]float64,jsondatatype=Array(Nullable(Float64)) name=education[].parameters,context=body,datatype=[]dynamic,jsondatatype=Array(Dynamic)]."},
},
},
{
@@ -622,7 +622,7 @@ func TestJSONStmtBuilder_ArrayPaths(t *testing.T) {
WhereClause: "(has(arrayFlatten(arrayConcat(arrayMap(`body_v2.education`->dynamicElement(`body_v2.education`.`parameters`, 'Array(Nullable(Float64))'), dynamicElement(body_v2.`education`, 'Array(JSON(max_dynamic_types=16, max_dynamic_paths=0))')))), ?) OR has(arrayFlatten(arrayConcat(arrayMap(`body_v2.education`->dynamicElement(`body_v2.education`.`parameters`, 'Array(Dynamic)'), dynamicElement(body_v2.`education`, 'Array(JSON(max_dynamic_types=16, max_dynamic_paths=0))')))), ?))",
Args: []any{uint64(1747945619), uint64(1747983448), 1.65, 1.65, "1747947419000000000", uint64(1747945619), "1747983448000000000", uint64(1747983448), 10},
Warnings: []string{
"Key `education[].parameters` is ambiguous, found 2 different combinations of field context / data type: [name=education[].parameters,context=body,datatype=[]float64 name=education[].parameters,context=body,datatype=[]dynamic].",
"Key `education[].parameters` is ambiguous, found 2 different combinations of field context / data type: [name=education[].parameters,context=body,datatype=[]float64,jsondatatype=Array(Nullable(Float64)) name=education[].parameters,context=body,datatype=[]dynamic,jsondatatype=Array(Dynamic)].",
},
},
},
@@ -702,7 +702,7 @@ func TestJSONStmtBuilder_ArrayPaths(t *testing.T) {
expected: TestExpected{
WhereClause: "(((arrayExists(`body_v2.interests`-> arrayExists(`body_v2.interests[].entities`-> arrayExists(`body_v2.interests[].entities[].reviews`-> arrayExists(`body_v2.interests[].entities[].reviews[].entries`-> arrayExists(`body_v2.interests[].entities[].reviews[].entries[].metadata`-> arrayExists(`body_v2.interests[].entities[].reviews[].entries[].metadata[].positions`-> (arrayExists(x -> LOWER(toString(x)) LIKE LOWER(?), dynamicElement(`body_v2.interests[].entities[].reviews[].entries[].metadata[].positions`.`ratings`, 'Array(Nullable(Int64))')) OR arrayExists(x -> toFloat64(x) = ?, dynamicElement(`body_v2.interests[].entities[].reviews[].entries[].metadata[].positions`.`ratings`, 'Array(Nullable(Int64))'))), dynamicElement(`body_v2.interests[].entities[].reviews[].entries[].metadata`.`positions`, 'Array(JSON(max_dynamic_types=0, max_dynamic_paths=0))')), dynamicElement(`body_v2.interests[].entities[].reviews[].entries`.`metadata`, 'Array(JSON(max_dynamic_types=1, max_dynamic_paths=0))')), dynamicElement(`body_v2.interests[].entities[].reviews`.`entries`, 'Array(JSON(max_dynamic_types=2, max_dynamic_paths=0))')), dynamicElement(`body_v2.interests[].entities`.`reviews`, 'Array(JSON(max_dynamic_types=4, max_dynamic_paths=0))')), dynamicElement(`body_v2.interests`.`entities`, 'Array(JSON(max_dynamic_types=8, max_dynamic_paths=0))')), dynamicElement(body_v2.`interests`, 'Array(JSON(max_dynamic_types=16, max_dynamic_paths=0))'))) AND has(JSONAllPaths(body_v2), 'interests')) OR ((arrayExists(`body_v2.interests`-> arrayExists(`body_v2.interests[].entities`-> arrayExists(`body_v2.interests[].entities[].reviews`-> arrayExists(`body_v2.interests[].entities[].reviews[].entries`-> arrayExists(`body_v2.interests[].entities[].reviews[].entries[].metadata`-> arrayExists(`body_v2.interests[].entities[].reviews[].entries[].metadata[].positions`-> (arrayExists(x -> LOWER(x) LIKE LOWER(?), dynamicElement(`body_v2.interests[].entities[].reviews[].entries[].metadata[].positions`.`ratings`, 'Array(Nullable(String))')) OR arrayExists(x -> toFloat64OrNull(x) = ?, dynamicElement(`body_v2.interests[].entities[].reviews[].entries[].metadata[].positions`.`ratings`, 'Array(Nullable(String))'))), dynamicElement(`body_v2.interests[].entities[].reviews[].entries[].metadata`.`positions`, 'Array(JSON(max_dynamic_types=0, max_dynamic_paths=0))')), dynamicElement(`body_v2.interests[].entities[].reviews[].entries`.`metadata`, 'Array(JSON(max_dynamic_types=1, max_dynamic_paths=0))')), dynamicElement(`body_v2.interests[].entities[].reviews`.`entries`, 'Array(JSON(max_dynamic_types=2, max_dynamic_paths=0))')), dynamicElement(`body_v2.interests[].entities`.`reviews`, 'Array(JSON(max_dynamic_types=4, max_dynamic_paths=0))')), dynamicElement(`body_v2.interests`.`entities`, 'Array(JSON(max_dynamic_types=8, max_dynamic_paths=0))')), dynamicElement(body_v2.`interests`, 'Array(JSON(max_dynamic_types=16, max_dynamic_paths=0))'))) AND has(JSONAllPaths(body_v2), 'interests')))",
Args: []any{uint64(1747945619), uint64(1747983448), "%4%", float64(4), "%4%", float64(4), "1747947419000000000", uint64(1747945619), "1747983448000000000", uint64(1747983448), 10},
Warnings: []string{"Key `interests[].entities[].reviews[].entries[].metadata[].positions[].ratings` is ambiguous, found 2 different combinations of field context / data type: [name=interests[].entities[].reviews[].entries[].metadata[].positions[].ratings,context=body,datatype=[]int64 name=interests[].entities[].reviews[].entries[].metadata[].positions[].ratings,context=body,datatype=[]string]."},
Warnings: []string{"Key `interests[].entities[].reviews[].entries[].metadata[].positions[].ratings` is ambiguous, found 2 different combinations of field context / data type: [name=interests[].entities[].reviews[].entries[].metadata[].positions[].ratings,context=body,datatype=[]int64,jsondatatype=Array(Nullable(Int64)) name=interests[].entities[].reviews[].entries[].metadata[].positions[].ratings,context=body,datatype=[]string,jsondatatype=Array(Nullable(String))]."},
},
},
{
@@ -946,16 +946,16 @@ func buildTestTelemetryMetadataStore(t *testing.T, addIndexes bool) *telemetryty
mockMetadataStore := telemetrytypestest.NewMockMetadataStore()
mockMetadataStore.SetStaticFields(IntrinsicFields)
types, _ := telemetrytypes.TestJSONTypeSet()
for path, fieldDataTypes := range types {
for _, fdt := range fieldDataTypes {
for path, jsonTypes := range types {
for _, jsonType := range jsonTypes {
key := &telemetrytypes.TelemetryFieldKey{
Name: path,
Signal: telemetrytypes.SignalLogs,
FieldContext: telemetrytypes.FieldContextBody,
FieldDataType: fdt,
FieldDataType: telemetrytypes.MappingJSONDataTypeToFieldDataType[jsonType],
JSONDataType: &jsonType,
}
if addIndexes {
jsonType := telemetrytypes.MappingFieldDataTypeToJSONDataType[fdt]
idx := slices.IndexFunc(telemetrytypes.TestIndexedPaths, func(entry telemetrytypes.TestIndexedPathEntry) bool {
return entry.Path == path && entry.Type == jsonType
})

View File

@@ -875,6 +875,7 @@ func TestAdjustKey(t *testing.T) {
require.Equal(t, c.expectedKey.FieldContext, key.FieldContext, "field context should match")
require.Equal(t, c.expectedKey.FieldDataType, key.FieldDataType, "field data type should match")
require.Equal(t, c.expectedKey.Materialized, key.Materialized, "materialized should match")
require.Equal(t, c.expectedKey.JSONDataType, key.JSONDataType, "json data type should match")
require.Equal(t, c.expectedKey.Indexes, key.Indexes, "json exists should match")
})
}

View File

@@ -21,72 +21,133 @@ import (
)
var (
defaultPathLimit = 100 // Default limit to prevent full table scans
CodeUnknownJSONDataType = errors.MustNewCode("unknown_json_data_type")
CodeFailLoadPromotedPaths = errors.MustNewCode("fail_load_promoted_paths")
CodeFailCheckPathPromoted = errors.MustNewCode("fail_check_path_promoted")
CodeFailIterateBodyJSONKeys = errors.MustNewCode("fail_iterate_body_json_keys")
CodeFailExtractBodyJSONKeys = errors.MustNewCode("fail_extract_body_json_keys")
CodeFailLoadLogsJSONIndexes = errors.MustNewCode("fail_load_logs_json_indexes")
CodeFailListJSONValues = errors.MustNewCode("fail_list_json_values")
CodeFailScanJSONValue = errors.MustNewCode("fail_scan_json_value")
CodeFailScanVariant = errors.MustNewCode("fail_scan_variant")
CodeFailBuildJSONPathsQuery = errors.MustNewCode("fail_build_json_paths_query")
CodeNoPathsToQueryIndexes = errors.MustNewCode("no_paths_to_query_indexes_provided")
CodeFailedToPrepareBatch = errors.MustNewCode("failed_to_prepare_batch_promoted_paths")
CodeFailedToSendBatch = errors.MustNewCode("failed_to_send_batch_promoted_paths")
CodeFailedToAppendPath = errors.MustNewCode("failed_to_append_path_promoted_paths")
)
// enrichBodyKeys enriches body-context keys with promoted path info, indexes,
// and JSON access plans.
// parentTypeCache contains parent array types (ArrayJSON/ArrayDynamic) pre-fetched in the main UNION query.
func (t *telemetryMetaStore) enrichBodyKeys(ctx context.Context, keys []*telemetrytypes.TelemetryFieldKey, parentTypeCache map[string][]telemetrytypes.FieldDataType) error {
if len(keys) == 0 {
return nil
}
// fetchBodyJSONPaths extracts body JSON paths from the path_types table
// This function can be used by both JSONQueryBuilder and metadata extraction
// uniquePathLimit: 0 for no limit, >0 for maximum number of unique paths to return
// - For startup load: set to 10000 to get top 10k unique paths
// - For lookup: set to 0 (no limit needed for single path)
// - For metadata API: set to desired pagination limit
//
// searchOperator: LIKE for pattern matching, EQUAL for exact match.
func (t *telemetryMetaStore) fetchBodyJSONPaths(ctx context.Context,
fieldKeySelectors []*telemetrytypes.FieldKeySelector) ([]*telemetrytypes.TelemetryFieldKey, []string, bool, error) {
ctx = ctxtypes.NewContextWithCommentVals(ctx, map[string]string{
instrumentationtypes.TelemetrySignal: telemetrytypes.SignalLogs.StringValue(),
instrumentationtypes.CodeNamespace: "metadata",
instrumentationtypes.CodeFunctionName: "fetchBodyJSONPaths",
})
var filteredKeys []*telemetrytypes.TelemetryFieldKey
for _, key := range keys {
if key.FieldContext == telemetrytypes.FieldContextBody {
filteredKeys = append(filteredKeys, key)
query, args, limit := buildGetBodyJSONPathsQuery(fieldKeySelectors)
rows, err := t.telemetrystore.ClickhouseDB().Query(ctx, query, args...)
if err != nil {
return nil, nil, false, errors.WrapInternalf(err, CodeFailExtractBodyJSONKeys, "failed to extract body JSON keys")
}
defer rows.Close()
fieldKeys := []*telemetrytypes.TelemetryFieldKey{}
paths := []string{}
rowCount := 0
for rows.Next() {
var path string
var typesArray []string // ClickHouse returns array as []string
var lastSeen uint64
err = rows.Scan(&path, &typesArray, &lastSeen)
if err != nil {
return nil, nil, false, errors.WrapInternalf(err, CodeFailExtractBodyJSONKeys, "failed to scan body JSON key row")
}
for _, typ := range typesArray {
mapping, found := telemetrytypes.MappingStringToJSONDataType[typ]
if !found {
t.logger.ErrorContext(ctx, "failed to map type string to JSON data type", slog.String("type", typ), slog.String("path", path))
continue
}
fieldKeys = append(fieldKeys, &telemetrytypes.TelemetryFieldKey{
Name: path,
Signal: telemetrytypes.SignalLogs,
FieldContext: telemetrytypes.FieldContextBody,
FieldDataType: telemetrytypes.MappingJSONDataTypeToFieldDataType[mapping],
JSONDataType: &mapping,
})
}
paths = append(paths, path)
rowCount++
}
if rows.Err() != nil {
return nil, nil, false, errors.WrapInternalf(rows.Err(), CodeFailIterateBodyJSONKeys, "error iterating body JSON keys")
}
// collect paths for batch queries
paths := make([]string, 0, len(filteredKeys))
for _, key := range filteredKeys {
paths = append(paths, key.Name)
}
// fetch promoted paths
promoted, err := t.GetPromotedPaths(ctx, paths...)
if err != nil {
return err
}
// fetch JSON path indexes
indexes, err := t.getJSONPathIndexes(ctx, paths...)
if err != nil {
return err
}
// apply promoted/index metadata to keys
for _, key := range filteredKeys {
promotedKey := strings.Split(key.Name, telemetrytypes.ArraySep)[0]
key.Materialized = promoted[promotedKey]
key.Indexes = indexes[key.Name]
}
// build JSON access plans using the pre-fetched parent type cache
return t.buildJSONPlans(ctx, filteredKeys, parentTypeCache)
return fieldKeys, paths, rowCount <= limit, nil
}
// buildJSONPlans builds JSON access plans for the given keys
// using the provided parent type cache (pre-fetched in the main UNION query).
func (t *telemetryMetaStore) buildJSONPlans(_ context.Context, keys []*telemetrytypes.TelemetryFieldKey, typeCache map[string][]telemetrytypes.FieldDataType) error {
if len(keys) == 0 {
return nil
func (t *telemetryMetaStore) buildBodyJSONPaths(ctx context.Context,
fieldKeySelectors []*telemetrytypes.FieldKeySelector) ([]*telemetrytypes.TelemetryFieldKey, bool, error) {
fieldKeys, paths, finished, err := t.fetchBodyJSONPaths(ctx, fieldKeySelectors)
if err != nil {
return nil, false, err
}
columnMeta := t.jsonColumnMetadata[telemetrytypes.SignalLogs][telemetrytypes.FieldContextBody]
promoted, err := t.GetPromotedPaths(ctx, paths...)
if err != nil {
return nil, false, err
}
indexes, err := t.getJSONPathIndexes(ctx, paths...)
if err != nil {
return nil, false, err
}
for _, fieldKey := range fieldKeys {
promotedKey := strings.Split(fieldKey.Name, telemetrytypes.ArraySep)[0]
fieldKey.Materialized = promoted[promotedKey]
fieldKey.Indexes = indexes[fieldKey.Name]
}
return fieldKeys, finished, t.buildJSONPlans(ctx, fieldKeys)
}
func (t *telemetryMetaStore) buildJSONPlans(ctx context.Context, keys []*telemetrytypes.TelemetryFieldKey) error {
parentSelectors := make([]*telemetrytypes.FieldKeySelector, 0, len(keys))
for _, key := range keys {
if err := key.SetJSONAccessPlan(columnMeta, typeCache); err != nil {
parentSelectors = append(parentSelectors, key.ArrayParentSelectors()...)
}
parentKeys, _, _, err := t.fetchBodyJSONPaths(ctx, parentSelectors)
if err != nil {
return err
}
typeCache := make(map[string][]telemetrytypes.JSONDataType)
for _, key := range parentKeys {
typeCache[key.Name] = append(typeCache[key.Name], *key.JSONDataType)
}
// build plans for keys now
for _, key := range keys {
err = key.SetJSONAccessPlan(t.jsonColumnMetadata[telemetrytypes.SignalLogs][telemetrytypes.FieldContextBody], typeCache)
if err != nil {
return err
}
}
@@ -94,6 +155,51 @@ func (t *telemetryMetaStore) buildJSONPlans(_ context.Context, keys []*telemetry
return nil
}
func buildGetBodyJSONPathsQuery(fieldKeySelectors []*telemetrytypes.FieldKeySelector) (string, []any, int) {
if len(fieldKeySelectors) == 0 {
return "", nil, defaultPathLimit
}
from := fmt.Sprintf("%s.%s", DBName, PathTypesTableName)
// Build a better query using GROUP BY to deduplicate at database level
// This aggregates all types per path and gets the max last_seen, then applies LIMIT
sb := sqlbuilder.Select(
"path",
"groupArray(DISTINCT type) AS types",
"max(last_seen) AS last_seen",
).From(from)
limit := 0
// Add search filter if provided
orClauses := []string{}
for _, fieldKeySelector := range fieldKeySelectors {
// replace [*] with []
fieldKeySelector.Name = strings.ReplaceAll(fieldKeySelector.Name, telemetrytypes.ArrayAnyIndex, telemetrytypes.ArraySep)
// Extract search text for body JSON keys
keyName := CleanPathPrefixes(fieldKeySelector.Name)
if fieldKeySelector.SelectorMatchType == telemetrytypes.FieldSelectorMatchTypeExact {
orClauses = append(orClauses, sb.Equal("path", keyName))
} else {
// Pattern matching for metadata API (defaults to LIKE behavior for other operators)
orClauses = append(orClauses, sb.ILike("path", fmt.Sprintf("%%%s%%", querybuilder.FormatValueForContains(keyName))))
}
limit += fieldKeySelector.Limit
}
sb.Where(sb.Or(orClauses...))
// Group by path to get unique paths with aggregated types
sb.GroupBy("path")
// Order by max last_seen to get most recent paths first
sb.OrderBy("last_seen DESC")
if limit == 0 {
limit = defaultPathLimit
}
sb.Limit(limit)
query, args := sb.BuildWithFlavor(sqlbuilder.ClickHouse)
return query, args, limit
}
func (t *telemetryMetaStore) getJSONPathIndexes(ctx context.Context, paths ...string) (map[string][]telemetrytypes.JSONDataTypeIndex, error) {
filteredPaths := []string{}
for _, path := range paths {

View File

@@ -7,9 +7,99 @@ import (
"github.com/SigNoz/signoz-otel-collector/constants"
"github.com/SigNoz/signoz/pkg/querybuilder"
"github.com/SigNoz/signoz/pkg/telemetrylogs"
"github.com/SigNoz/signoz/pkg/types/telemetrytypes"
"github.com/stretchr/testify/require"
)
func TestBuildGetBodyJSONPathsQuery(t *testing.T) {
testCases := []struct {
name string
fieldKeySelectors []*telemetrytypes.FieldKeySelector
expectedSQL string
expectedArgs []any
expectedLimit int
}{
{
name: "Single search text with EQUAL operator",
fieldKeySelectors: []*telemetrytypes.FieldKeySelector{
{
Name: "user.name",
SelectorMatchType: telemetrytypes.FieldSelectorMatchTypeExact,
},
},
expectedSQL: "SELECT path, groupArray(DISTINCT type) AS types, max(last_seen) AS last_seen FROM signoz_metadata.distributed_json_path_types WHERE (path = ?) GROUP BY path ORDER BY last_seen DESC LIMIT ?",
expectedArgs: []any{"user.name", defaultPathLimit},
expectedLimit: defaultPathLimit,
},
{
name: "Single search text with LIKE operator",
fieldKeySelectors: []*telemetrytypes.FieldKeySelector{
{
Name: "user",
SelectorMatchType: telemetrytypes.FieldSelectorMatchTypeFuzzy,
},
},
expectedSQL: "SELECT path, groupArray(DISTINCT type) AS types, max(last_seen) AS last_seen FROM signoz_metadata.distributed_json_path_types WHERE (LOWER(path) LIKE LOWER(?)) GROUP BY path ORDER BY last_seen DESC LIMIT ?",
expectedArgs: []any{"%user%", 100},
expectedLimit: 100,
},
{
name: "Multiple search texts with EQUAL operator",
fieldKeySelectors: []*telemetrytypes.FieldKeySelector{
{
Name: "user.name",
SelectorMatchType: telemetrytypes.FieldSelectorMatchTypeExact,
},
{
Name: "user.age",
SelectorMatchType: telemetrytypes.FieldSelectorMatchTypeExact,
},
},
expectedSQL: "SELECT path, groupArray(DISTINCT type) AS types, max(last_seen) AS last_seen FROM signoz_metadata.distributed_json_path_types WHERE (path = ? OR path = ?) GROUP BY path ORDER BY last_seen DESC LIMIT ?",
expectedArgs: []any{"user.name", "user.age", defaultPathLimit},
expectedLimit: defaultPathLimit,
},
{
name: "Multiple search texts with LIKE operator",
fieldKeySelectors: []*telemetrytypes.FieldKeySelector{
{
Name: "user",
SelectorMatchType: telemetrytypes.FieldSelectorMatchTypeFuzzy,
},
{
Name: "admin",
SelectorMatchType: telemetrytypes.FieldSelectorMatchTypeFuzzy,
},
},
expectedSQL: "SELECT path, groupArray(DISTINCT type) AS types, max(last_seen) AS last_seen FROM signoz_metadata.distributed_json_path_types WHERE (LOWER(path) LIKE LOWER(?) OR LOWER(path) LIKE LOWER(?)) GROUP BY path ORDER BY last_seen DESC LIMIT ?",
expectedArgs: []any{"%user%", "%admin%", defaultPathLimit},
expectedLimit: defaultPathLimit,
},
{
name: "Search with Contains operator (should default to LIKE)",
fieldKeySelectors: []*telemetrytypes.FieldKeySelector{
{
Name: "test",
SelectorMatchType: telemetrytypes.FieldSelectorMatchTypeFuzzy,
},
},
expectedSQL: "SELECT path, groupArray(DISTINCT type) AS types, max(last_seen) AS last_seen FROM signoz_metadata.distributed_json_path_types WHERE (LOWER(path) LIKE LOWER(?)) GROUP BY path ORDER BY last_seen DESC LIMIT ?",
expectedArgs: []any{"%test%", defaultPathLimit},
expectedLimit: defaultPathLimit,
},
}
for _, tc := range testCases {
t.Run(tc.name, func(t *testing.T) {
query, args, limit := buildGetBodyJSONPathsQuery(tc.fieldKeySelectors)
require.Equal(t, tc.expectedSQL, query)
require.Equal(t, tc.expectedArgs, args)
require.Equal(t, tc.expectedLimit, limit)
})
}
}
func TestBuildListLogsJSONIndexesQuery(t *testing.T) {
testCases := []struct {
name string

View File

@@ -368,19 +368,6 @@ func (t *telemetryMetaStore) logsTblStatementToFieldKeys(ctx context.Context) ([
return materialisedKeys, nil
}
// logKeysUnionArm declares one arm of the UNION ALL in getLogsKeys.
// All per-table variance is captured here so the loop body can stay uniform.
type logKeysUnionArm struct {
shouldQuery bool
fieldContext telemetrytypes.FieldContext
table string
dataTypeColumn string // column used in WHERE/GROUP BY
dataTypeSelectExpr string // expression used in SELECT (may wrap with lower())
addBaseFilters func(sb *sqlbuilder.SelectBuilder) // mandatory WHERE filters (e.g., signal, field_context)
encodeDataType func(telemetrytypes.FieldDataType) string // how to render a FieldDataType in WHERE values
extraOrBranch func(sb *sqlbuilder.SelectBuilder) string // optional extra OR branch (e.g., body parent-types)
}
// getLogsKeys returns the keys from the spans that match the field selection criteria.
func (t *telemetryMetaStore) getLogsKeys(ctx context.Context, fieldKeySelectors []*telemetrytypes.FieldKeySelector) ([]*telemetrytypes.TelemetryFieldKey, bool, error) {
ctx = ctxtypes.NewContextWithCommentVals(ctx, map[string]string{
@@ -410,149 +397,90 @@ func (t *telemetryMetaStore) getLogsKeys(ctx context.Context, fieldKeySelectors
// tables to query based on field selectors
queryAttributeTable := false
queryResourceTable := false
queryBodyTable := false
for _, selector := range fieldKeySelectors {
if selector.FieldContext == telemetrytypes.FieldContextUnspecified {
// unspecified context, query all tables
// unspecified context, query both tables
queryAttributeTable = true
queryResourceTable = true
queryBodyTable = true
break
} else if selector.FieldContext == telemetrytypes.FieldContextAttribute {
queryAttributeTable = true
} else if selector.FieldContext == telemetrytypes.FieldContextResource {
queryResourceTable = true
} else if selector.FieldContext == telemetrytypes.FieldContextBody && querybuilder.BodyJSONQueryEnabled {
queryBodyTable = true
}
}
// body keys are gated behind the feature flag
queryBodyTable = queryBodyTable && querybuilder.BodyJSONQueryEnabled
// pre-compute parent array path names from body selectors for JSON plan building;
// these will be fetched as a separate UNION arm filtered to ArrayJSON/ArrayDynamic only
parentPaths := make(map[string]bool)
if queryBodyTable {
for _, sel := range fieldKeySelectors {
if sel.FieldContext != telemetrytypes.FieldContextBody &&
sel.FieldContext != telemetrytypes.FieldContextUnspecified {
continue
}
if !strings.Contains(sel.Name, telemetrytypes.ArraySep) {
continue
}
key := &telemetrytypes.TelemetryFieldKey{
Name: sel.Name,
Signal: telemetrytypes.SignalLogs,
FieldContext: telemetrytypes.FieldContextBody,
}
for _, ps := range key.ArrayParentSelectors() {
parentPaths[ps.Name] = true
}
}
tablesToQuery := []struct {
fieldContext telemetrytypes.FieldContext
shouldQuery bool
}{
{telemetrytypes.FieldContextAttribute, queryAttributeTable},
{telemetrytypes.FieldContextResource, queryResourceTable},
}
// Each UNION arm differs only in: table, data-type column name and SELECT
// expression (lower-wrapped for historical mixed-case in attr/resource),
// base WHERE filters, the per-selector data-type encoding, and (for body)
// an extra OR branch that fetches parent array types for JSON plan building.
// All other logic is shared by the loop below.
tablesToQuery := []logKeysUnionArm{
{
shouldQuery: queryAttributeTable,
fieldContext: telemetrytypes.FieldContextAttribute,
table: t.logsDBName + "." + t.logAttributeKeysTblName,
dataTypeColumn: "datatype",
dataTypeSelectExpr: "lower(datatype)",
addBaseFilters: func(*sqlbuilder.SelectBuilder) {},
encodeDataType: func(ft telemetrytypes.FieldDataType) string { return ft.TagDataType() },
extraOrBranch: func(*sqlbuilder.SelectBuilder) string { return "" },
},
{
shouldQuery: queryResourceTable,
fieldContext: telemetrytypes.FieldContextResource,
table: t.logsDBName + "." + t.logResourceKeysTblName,
dataTypeColumn: "datatype",
dataTypeSelectExpr: "lower(datatype)",
addBaseFilters: func(*sqlbuilder.SelectBuilder) {},
encodeDataType: func(ft telemetrytypes.FieldDataType) string { return ft.TagDataType() },
extraOrBranch: func(*sqlbuilder.SelectBuilder) string { return "" },
},
{
shouldQuery: queryBodyTable,
fieldContext: telemetrytypes.FieldContextBody,
table: fmt.Sprintf("%s.%s", DBName, FieldKeysTable),
dataTypeColumn: "field_data_type",
dataTypeSelectExpr: "field_data_type",
addBaseFilters: func(sb *sqlbuilder.SelectBuilder) {
sb.Where(sb.E("signal", telemetrytypes.SignalLogs.StringValue()))
sb.Where(sb.E("field_context", telemetrytypes.FieldContextBody.StringValue()))
},
encodeDataType: func(ft telemetrytypes.FieldDataType) string { return ft.StringValue() },
extraOrBranch: func(sb *sqlbuilder.SelectBuilder) string {
if len(parentPaths) == 0 {
return ""
}
names := make([]any, 0, len(parentPaths))
for n := range parentPaths {
names = append(names, n)
}
return sb.And(
sb.In("name", names...),
sb.In("field_data_type",
telemetrytypes.FieldDataTypeArrayDynamic.StringValue(),
telemetrytypes.FieldDataTypeArrayJSON.StringValue(),
),
)
},
},
}
for _, arm := range tablesToQuery {
if !arm.shouldQuery {
for _, table := range tablesToQuery {
if !table.shouldQuery {
continue
}
fieldContext := table.fieldContext
// table name based on field context
var tblName string
if fieldContext == telemetrytypes.FieldContextAttribute {
tblName = t.logsDBName + "." + t.logAttributeKeysTblName
} else {
tblName = t.logsDBName + "." + t.logResourceKeysTblName
}
sb := sqlbuilder.Select(
"name AS tag_key",
fmt.Sprintf("'%s' AS tag_type", arm.fieldContext.TagType()),
arm.dataTypeSelectExpr+" AS tag_data_type",
fmt.Sprintf("%d AS priority", getPriorityForContext(arm.fieldContext)),
).From(arm.table)
fmt.Sprintf("'%s' AS tag_type", fieldContext.TagType()),
"lower(datatype) AS tag_data_type", // in logs, we had some historical data with capital and small case
fmt.Sprintf(`%d AS priority`, getPriorityForContext(fieldContext)),
).From(tblName)
arm.addBaseFilters(sb)
var limit int
conds := []string{}
branches := []string{}
for _, sel := range fieldKeySelectors {
if sel.FieldContext != telemetrytypes.FieldContextUnspecified && sel.FieldContext != arm.fieldContext {
for _, fieldKeySelector := range fieldKeySelectors {
// Include this selector if:
// 1. It has unspecified context (matches all tables)
// 2. Its context matches the current table's context
if fieldKeySelector.FieldContext != telemetrytypes.FieldContextUnspecified &&
fieldKeySelector.FieldContext != fieldContext {
continue
}
parts := []string{}
if sel.SelectorMatchType == telemetrytypes.FieldSelectorMatchTypeExact {
parts = append(parts, sb.E("name", sel.Name))
// key part of the selector
fieldKeyConds := []string{}
if fieldKeySelector.SelectorMatchType == telemetrytypes.FieldSelectorMatchTypeExact {
fieldKeyConds = append(fieldKeyConds, sb.E("name", fieldKeySelector.Name))
} else {
parts = append(parts, sb.ILike("name", "%"+escapeForLike(sel.Name)+"%"))
fieldKeyConds = append(fieldKeyConds, sb.ILike("name", "%"+escapeForLike(fieldKeySelector.Name)+"%"))
}
if sel.FieldDataType != telemetrytypes.FieldDataTypeUnspecified {
parts = append(parts, sb.E(arm.dataTypeColumn, arm.encodeDataType(sel.FieldDataType)))
// now look at the field data type
if fieldKeySelector.FieldDataType != telemetrytypes.FieldDataTypeUnspecified {
fieldKeyConds = append(fieldKeyConds, sb.E("datatype", fieldKeySelector.FieldDataType.TagDataType()))
}
if len(parts) > 0 {
branches = append(branches, sb.And(parts...))
if len(fieldKeyConds) > 0 {
conds = append(conds, sb.And(fieldKeyConds...))
}
limit += fieldKeySelector.Limit
}
if extra := arm.extraOrBranch(sb); extra != "" {
branches = append(branches, extra)
if len(conds) > 0 {
sb.Where(sb.Or(conds...))
}
if len(branches) > 0 {
sb.Where(sb.Or(branches...))
sb.GroupBy("name", "datatype")
if limit == 0 {
limit = 1000
}
sb.GroupBy("name", arm.dataTypeColumn)
query, args := sb.BuildWithFlavor(sqlbuilder.ClickHouse)
queries = append(queries, query)
allArgs = append(allArgs, args...)
@@ -589,7 +517,6 @@ func (t *telemetryMetaStore) getLogsKeys(ctx context.Context, fieldKeySelectors
defer rows.Close()
keys := []*telemetrytypes.TelemetryFieldKey{}
parentTypeCache := make(map[string][]telemetrytypes.FieldDataType)
rowCount := 0
searchTexts := []string{}
@@ -613,17 +540,6 @@ func (t *telemetryMetaStore) getLogsKeys(ctx context.Context, fieldKeySelectors
if err != nil {
return nil, false, errors.Wrap(err, errors.TypeInternal, errors.CodeInternal, ErrFailedToGetLogsKeys.Error())
}
// body keys with ArrayJSON/ArrayDynamic types are internal container types
// used only for JSON access plan building; route to parentTypeCache, not to results
switch fieldDataType {
case telemetrytypes.FieldDataTypeArrayJSON, telemetrytypes.FieldDataTypeArrayDynamic:
if fieldContext == telemetrytypes.FieldContextBody && parentPaths[name] {
parentTypeCache[name] = append(parentTypeCache[name], fieldDataType)
continue
}
}
key, ok := mapOfKeys[name+";"+fieldContext.StringValue()+";"+fieldDataType.StringValue()]
// if there is no materialised column, create a key with the field context and data type
@@ -677,11 +593,13 @@ func (t *telemetryMetaStore) getLogsKeys(ctx context.Context, fieldKeySelectors
}
}
// enrich body keys with promoted paths, indexes, and JSON access plans
if querybuilder.BodyJSONQueryEnabled {
if err := t.enrichBodyKeys(ctx, keys, parentTypeCache); err != nil {
t.logger.ErrorContext(ctx, "failed to enrich body JSON keys", errors.Attr(err))
bodyJSONPaths, finished, err := t.buildBodyJSONPaths(ctx, fieldKeySelectors) // LIKE for pattern matching
if err != nil {
t.logger.ErrorContext(ctx, "failed to extract body JSON paths", errors.Attr(err))
}
keys = append(keys, bodyJSONPaths...)
complete = complete && finished
}
if _, err := t.updateColumnEvolutionMetadataForKeys(ctx, keys); err != nil {

View File

@@ -1,11 +1,13 @@
package telemetrymetadata
import otelcollectorconst "github.com/SigNoz/signoz-otel-collector/constants"
const (
DBName = "signoz_metadata"
AttributesMetadataTableName = "distributed_attributes_metadata"
AttributesMetadataLocalTableName = "attributes_metadata"
ColumnEvolutionMetadataTableName = "distributed_column_evolution_metadata"
FieldKeysTable = "distributed_field_keys"
PathTypesTableName = otelcollectorconst.DistributedPathTypesTable
// Column Evolution table stores promoted paths as (signal, column_name, field_context, field_name); see signoz-otel-collector metadata_migrations.
PromotedPathsTableName = "distributed_column_evolution_metadata"
SkipIndexTableName = "system.data_skipping_indices"

View File

@@ -37,6 +37,7 @@ type TelemetryFieldKey struct {
FieldContext FieldContext `json:"fieldContext,omitzero"`
FieldDataType FieldDataType `json:"fieldDataType,omitzero"`
JSONDataType *JSONDataType `json:"-"`
JSONPlan JSONAccessPlan `json:"-"`
Indexes []JSONDataTypeIndex `json:"-"`
Materialized bool `json:"-"` // refers to promoted in case of body.... fields
@@ -79,12 +80,6 @@ func (f *TelemetryFieldKey) ArrayParentSelectors() []*FieldKeySelector {
return selectors
}
// GetJSONDataType derives the JSONDataType from FieldDataType.
// Callers should check FieldDataType != FieldDataTypeUnspecified before calling.
func (f *TelemetryFieldKey) GetJSONDataType() JSONDataType {
return MappingFieldDataTypeToJSONDataType[f.FieldDataType]
}
func (f TelemetryFieldKey) String() string {
var sb strings.Builder
fmt.Fprintf(&sb, "name=%s", f.Name)
@@ -97,6 +92,9 @@ func (f TelemetryFieldKey) String() string {
if f.Materialized {
sb.WriteString(",materialized=true")
}
if f.JSONDataType != nil {
fmt.Fprintf(&sb, ",jsondatatype=%s", f.JSONDataType.StringValue())
}
if len(f.Indexes) > 0 {
sb.WriteString(",indexes=[")
for i, index := range f.Indexes {
@@ -119,6 +117,7 @@ func (f TelemetryFieldKey) Text() string {
func (f *TelemetryFieldKey) OverrideMetadataFrom(src *TelemetryFieldKey) {
f.FieldContext = src.FieldContext
f.FieldDataType = src.FieldDataType
f.JSONDataType = src.JSONDataType
f.Indexes = src.Indexes
f.Materialized = src.Materialized
f.JSONPlan = src.JSONPlan

View File

@@ -31,7 +31,7 @@ var (
FieldDataTypeArrayInt64 = FieldDataType{valuer.NewString("[]int64")}
FieldDataTypeArrayNumber = FieldDataType{valuer.NewString("[]number")}
FieldDataTypeArrayJSON = FieldDataType{valuer.NewString("[]object")}
FieldDataTypeArrayObject = FieldDataType{valuer.NewString("[]object")}
FieldDataTypeArrayDynamic = FieldDataType{valuer.NewString("[]dynamic")}
// Map string representations to FieldDataType values
@@ -72,8 +72,6 @@ var (
"[]float64": FieldDataTypeArrayFloat64,
"[]number": FieldDataTypeArrayNumber,
"[]bool": FieldDataTypeArrayBool,
"[]json": FieldDataTypeArrayJSON,
"[]dynamic": FieldDataTypeArrayDynamic,
// c-style array types
"string[]": FieldDataTypeArrayString,
@@ -81,8 +79,6 @@ var (
"float64[]": FieldDataTypeArrayFloat64,
"number[]": FieldDataTypeArrayNumber,
"bool[]": FieldDataTypeArrayBool,
"json[]": FieldDataTypeArrayJSON,
"dynamic[]": FieldDataTypeArrayDynamic,
}
fieldDataTypeToCHDataType = map[FieldDataType]string{

View File

@@ -43,7 +43,7 @@ type JSONAccessNode struct {
isRoot bool // marked true for only body_v2 and body_promoted
// Precomputed type information (single source of truth)
AvailableTypes []FieldDataType
AvailableTypes []JSONDataType
// Array type branches (Array(JSON) vs Array(Dynamic))
Branches map[JSONAccessBranchType]*JSONAccessNode
@@ -106,7 +106,7 @@ type planBuilder struct {
paths []string // cumulative paths for type cache lookups
segments []string // individual path segments for node names
isPromoted bool
typeCache map[string][]FieldDataType
typeCache map[string][]JSONDataType
}
// buildPlan recursively builds the path plan tree.
@@ -155,14 +155,14 @@ func (pb *planBuilder) buildPlan(index int, parent *JSONAccessNode, isDynArrChil
MaxDynamicPaths: maxPaths,
}
hasJSON := slices.Contains(node.AvailableTypes, FieldDataTypeArrayJSON)
hasDynamic := slices.Contains(node.AvailableTypes, FieldDataTypeArrayDynamic)
hasJSON := slices.Contains(node.AvailableTypes, ArrayJSON)
hasDynamic := slices.Contains(node.AvailableTypes, ArrayDynamic)
// Configure terminal if this is the last part
if isTerminal {
node.TerminalConfig = &TerminalConfig{
Key: pb.key,
ElemType: pb.key.GetJSONDataType(),
ElemType: *pb.key.JSONDataType,
}
} else {
var err error
@@ -185,7 +185,7 @@ func (pb *planBuilder) buildPlan(index int, parent *JSONAccessNode, isDynArrChil
// buildJSONAccessPlan builds a tree structure representing the complete JSON path traversal
// that precomputes all possible branches and their types.
func (key *TelemetryFieldKey) SetJSONAccessPlan(columnInfo JSONColumnMetadata, typeCache map[string][]FieldDataType,
func (key *TelemetryFieldKey) SetJSONAccessPlan(columnInfo JSONColumnMetadata, typeCache map[string][]JSONDataType,
) error {
// if path is empty, return nil
if key.Name == "" {

View File

@@ -19,11 +19,11 @@ const (
// ============================================================================
// makeKey creates a TelemetryFieldKey for testing.
func makeKey(name string, dataType FieldDataType, materialized bool) *TelemetryFieldKey {
func makeKey(name string, dataType JSONDataType, materialized bool) *TelemetryFieldKey {
return &TelemetryFieldKey{
Name: name,
FieldDataType: dataType,
Materialized: materialized,
Name: name,
JSONDataType: &dataType,
Materialized: materialized,
}
}
@@ -242,7 +242,7 @@ func TestPlanJSON_BasicStructure(t *testing.T) {
}{
{
name: "Simple path not promoted",
key: makeKey("user.name", FieldDataTypeString, false),
key: makeKey("user.name", String, false),
expectedYAML: fmt.Sprintf(`
- name: user.name
column: %s
@@ -255,7 +255,7 @@ func TestPlanJSON_BasicStructure(t *testing.T) {
},
{
name: "Simple path promoted",
key: makeKey("user.name", FieldDataTypeString, true),
key: makeKey("user.name", String, true),
expectedYAML: fmt.Sprintf(`
- name: user.name
column: %s
@@ -276,7 +276,7 @@ func TestPlanJSON_BasicStructure(t *testing.T) {
},
{
name: "Empty path returns error",
key: makeKey("", FieldDataTypeString, false),
key: makeKey("", String, false),
expectErr: true,
expectedYAML: "",
},
@@ -431,7 +431,7 @@ func TestPlanJSON_ArrayPaths(t *testing.T) {
for _, tt := range tests {
t.Run(tt.name, func(t *testing.T) {
key := makeKey(tt.path, FieldDataTypeString, false)
key := makeKey(tt.path, String, false)
err := key.SetJSONAccessPlan(JSONColumnMetadata{
BaseColumn: bodyV2Column,
PromotedColumn: bodyPromotedColumn,
@@ -450,7 +450,7 @@ func TestPlanJSON_PromotedVsNonPromoted(t *testing.T) {
path := "education[].awards[].type"
t.Run("Non-promoted plan", func(t *testing.T) {
key := makeKey(path, FieldDataTypeString, false)
key := makeKey(path, String, false)
err := key.SetJSONAccessPlan(JSONColumnMetadata{
BaseColumn: bodyV2Column,
PromotedColumn: bodyPromotedColumn,
@@ -493,7 +493,7 @@ func TestPlanJSON_PromotedVsNonPromoted(t *testing.T) {
})
t.Run("Promoted plan", func(t *testing.T) {
key := makeKey(path, FieldDataTypeString, true)
key := makeKey(path, String, true)
err := key.SetJSONAccessPlan(JSONColumnMetadata{
BaseColumn: bodyV2Column,
PromotedColumn: bodyPromotedColumn,
@@ -666,12 +666,12 @@ func TestPlanJSON_EdgeCases(t *testing.T) {
for _, tt := range tests {
t.Run(tt.name, func(t *testing.T) {
// Choose key type based on path; operator does not affect the tree shape asserted here.
keyType := FieldDataTypeString
keyType := String
switch tt.path {
case "education":
keyType = FieldDataTypeArrayJSON
keyType = ArrayJSON
case "education[].type":
keyType = FieldDataTypeString
keyType = String
}
key := makeKey(tt.path, keyType, false)
err := key.SetJSONAccessPlan(JSONColumnMetadata{
@@ -692,7 +692,7 @@ func TestPlanJSON_EdgeCases(t *testing.T) {
func TestPlanJSON_TreeStructure(t *testing.T) {
types, _ := TestJSONTypeSet()
path := "education[].awards[].participated[].team[].branch"
key := makeKey(path, FieldDataTypeString, false)
key := makeKey(path, String, false)
err := key.SetJSONAccessPlan(JSONColumnMetadata{
BaseColumn: bodyV2Column,
PromotedColumn: bodyPromotedColumn,

View File

@@ -46,6 +46,14 @@ var MappingStringToJSONDataType = map[string]JSONDataType{
"Array(JSON)": ArrayJSON,
}
var ScalerTypeToArrayType = map[JSONDataType]JSONDataType{
String: ArrayString,
Int64: ArrayInt64,
Float64: ArrayFloat64,
Bool: ArrayBool,
Dynamic: ArrayDynamic,
}
var MappingFieldDataTypeToJSONDataType = map[FieldDataType]JSONDataType{
FieldDataTypeString: String,
FieldDataTypeInt64: Int64,
@@ -55,8 +63,18 @@ var MappingFieldDataTypeToJSONDataType = map[FieldDataType]JSONDataType{
FieldDataTypeArrayString: ArrayString,
FieldDataTypeArrayInt64: ArrayInt64,
FieldDataTypeArrayFloat64: ArrayFloat64,
FieldDataTypeArrayNumber: ArrayFloat64,
FieldDataTypeArrayBool: ArrayBool,
FieldDataTypeArrayDynamic: ArrayDynamic,
FieldDataTypeArrayJSON: ArrayJSON,
}
var MappingJSONDataTypeToFieldDataType = map[JSONDataType]FieldDataType{
String: FieldDataTypeString,
Int64: FieldDataTypeInt64,
Float64: FieldDataTypeFloat64,
Bool: FieldDataTypeBool,
ArrayString: FieldDataTypeArrayString,
ArrayInt64: FieldDataTypeArrayInt64,
ArrayFloat64: FieldDataTypeArrayFloat64,
ArrayBool: FieldDataTypeArrayBool,
ArrayDynamic: FieldDataTypeArrayDynamic,
ArrayJSON: FieldDataTypeArrayObject,
}

View File

@@ -4,69 +4,69 @@ package telemetrytypes
// Test JSON Type Set Data Setup
// ============================================================================
// TestJSONTypeSet returns a map of path->field data types for testing.
// TestJSONTypeSet returns a map of path->types for testing.
// This represents the type information available in the test JSON structure.
func TestJSONTypeSet() (map[string][]FieldDataType, MetadataStore) {
types := map[string][]FieldDataType{
func TestJSONTypeSet() (map[string][]JSONDataType, MetadataStore) {
types := map[string][]JSONDataType{
// ── user (primitives) ─────────────────────────────────────────────
"user.name": {FieldDataTypeString},
"user.permissions": {FieldDataTypeArrayString},
"user.age": {FieldDataTypeInt64, FieldDataTypeString}, // Int64/String ambiguity
"user.height": {FieldDataTypeFloat64},
"user.active": {FieldDataTypeBool}, // Bool — not IndexSupported
"user.name": {String},
"user.permissions": {ArrayString},
"user.age": {Int64, String}, // Int64/String ambiguity
"user.height": {Float64},
"user.active": {Bool}, // Bool — not IndexSupported
// Deeper non-array nesting (a.b.c — no array hops)
"user.address.zip": {FieldDataTypeInt64},
"user.address.zip": {Int64},
// ── education[] ───────────────────────────────────────────────────
// Pattern: x[].y
"education": {FieldDataTypeArrayJSON},
"education[].name": {FieldDataTypeString},
"education[].type": {FieldDataTypeString, FieldDataTypeInt64},
"education[].year": {FieldDataTypeInt64},
"education[].scores": {FieldDataTypeArrayInt64},
"education[].parameters": {FieldDataTypeArrayFloat64, FieldDataTypeArrayDynamic},
"education": {ArrayJSON},
"education[].name": {String},
"education[].type": {String, Int64},
"education[].year": {Int64},
"education[].scores": {ArrayInt64},
"education[].parameters": {ArrayFloat64, ArrayDynamic},
// Pattern: x[].y[]
"education[].awards": {FieldDataTypeArrayDynamic, FieldDataTypeArrayJSON},
"education[].awards": {ArrayDynamic, ArrayJSON},
// Pattern: x[].y[].z
"education[].awards[].name": {FieldDataTypeString},
"education[].awards[].type": {FieldDataTypeString},
"education[].awards[].semester": {FieldDataTypeInt64},
"education[].awards[].name": {String},
"education[].awards[].type": {String},
"education[].awards[].semester": {Int64},
// Pattern: x[].y[].z[]
"education[].awards[].participated": {FieldDataTypeArrayDynamic, FieldDataTypeArrayJSON},
"education[].awards[].participated": {ArrayDynamic, ArrayJSON},
// Pattern: x[].y[].z[].w
"education[].awards[].participated[].members": {FieldDataTypeArrayString},
"education[].awards[].participated[].members": {ArrayString},
// Pattern: x[].y[].z[].w[]
"education[].awards[].participated[].team": {FieldDataTypeArrayJSON},
"education[].awards[].participated[].team": {ArrayJSON},
// Pattern: x[].y[].z[].w[].v
"education[].awards[].participated[].team[].branch": {FieldDataTypeString},
"education[].awards[].participated[].team[].branch": {String},
// ── interests[] ───────────────────────────────────────────────────
"interests": {FieldDataTypeArrayJSON},
"interests[].entities": {FieldDataTypeArrayJSON},
"interests[].entities[].reviews": {FieldDataTypeArrayJSON},
"interests[].entities[].reviews[].entries": {FieldDataTypeArrayJSON},
"interests[].entities[].reviews[].entries[].metadata": {FieldDataTypeArrayJSON},
"interests[].entities[].reviews[].entries[].metadata[].positions": {FieldDataTypeArrayJSON},
"interests[].entities[].reviews[].entries[].metadata[].positions[].name": {FieldDataTypeString},
"interests[].entities[].reviews[].entries[].metadata[].positions[].ratings": {FieldDataTypeArrayInt64, FieldDataTypeArrayString},
"http-events": {FieldDataTypeArrayJSON},
"http-events[].request-info.host": {FieldDataTypeString},
"ids": {FieldDataTypeArrayDynamic},
"interests": {ArrayJSON},
"interests[].entities": {ArrayJSON},
"interests[].entities[].reviews": {ArrayJSON},
"interests[].entities[].reviews[].entries": {ArrayJSON},
"interests[].entities[].reviews[].entries[].metadata": {ArrayJSON},
"interests[].entities[].reviews[].entries[].metadata[].positions": {ArrayJSON},
"interests[].entities[].reviews[].entries[].metadata[].positions[].name": {String},
"interests[].entities[].reviews[].entries[].metadata[].positions[].ratings": {ArrayInt64, ArrayString},
"http-events": {ArrayJSON},
"http-events[].request-info.host": {String},
"ids": {ArrayDynamic},
// ── top-level primitives ──────────────────────────────────────────
"message": {FieldDataTypeString},
"http-status": {FieldDataTypeInt64, FieldDataTypeString}, // hyphen in root key, ambiguous
"message": {String},
"http-status": {Int64, String}, // hyphen in root key, ambiguous
// ── top-level nested objects (no array hops) ───────────────────────
"response.time-taken": {FieldDataTypeFloat64}, // hyphen inside nested key
"response.time-taken": {Float64}, // hyphen inside nested key
}
return types, nil

View File

@@ -23,6 +23,7 @@ pytest_plugins = [
"fixtures.notification_channel",
"fixtures.alerts",
"fixtures.cloudintegrations",
"fixtures.jsontypeexporter",
]

View File

@@ -0,0 +1,437 @@
"""
Simpler version of jsontypeexporter for test fixtures.
This exports JSON type metadata to the path_types table by parsing JSON bodies
and extracting all paths with their types, similar to how the real jsontypeexporter works.
"""
import datetime
import json
from abc import ABC
from typing import (
TYPE_CHECKING,
Any,
Callable,
Dict,
Generator,
List,
Optional,
Set,
Union,
)
import numpy as np
import pytest
from fixtures import types
if TYPE_CHECKING:
pass
class JSONPathType(ABC):
"""Represents a JSON path with its type information"""
path: str
type: str
last_seen: np.uint64
def __init__(
self,
path: str,
type: str, # pylint: disable=redefined-builtin
last_seen: Optional[datetime.datetime] = None,
) -> None:
self.path = path
self.type = type
if last_seen is None:
last_seen = datetime.datetime.now()
self.last_seen = np.uint64(int(last_seen.timestamp() * 1e9))
def np_arr(self) -> np.array:
"""Return path type data as numpy array for database insertion"""
return np.array([self.path, self.type, self.last_seen])
# Constants matching jsontypeexporter
ARRAY_SEPARATOR = "[]." # Used in paths like "education[].name"
ARRAY_SUFFIX = "[]" # Used when traversing into array element objects
def _infer_array_type_from_type_strings(types: List[str]) -> Optional[str]:
"""
Infer array type from a list of pre-classified type strings.
Matches jsontypeexporter's inferArrayMask logic (v0.144.2+).
Type strings are: "JSON", "String", "Bool", "Float64", "Int64"
SuperTyping rules (matching Go inferArrayMask):
- JSON alone → Array(JSON)
- JSON + any primitive → Array(Dynamic)
- String alone → Array(Nullable(String)); String + other → Array(Dynamic)
- Float64 wins over Int64 and Bool
- Int64 wins over Bool
- Bool alone → Array(Nullable(Bool))
"""
if len(types) == 0:
return None
unique = set(types)
has_json = "JSON" in unique
# hasPrimitive mirrors Go: (hasJSON && len(unique) > 1) || (!hasJSON && len(unique) > 0)
has_primitive = (has_json and len(unique) > 1) or (not has_json and len(unique) > 0)
if has_json:
if not has_primitive:
return "Array(JSON)"
return "Array(Dynamic)"
# ---- Primitive Type Resolution (Float > Int > Bool) ----
if "String" in unique:
if len(unique) > 1:
return "Array(Dynamic)"
return "Array(Nullable(String))"
if "Float64" in unique:
return "Array(Nullable(Float64))"
if "Int64" in unique:
return "Array(Nullable(Int64))"
if "Bool" in unique:
return "Array(Nullable(Bool))"
return "Array(Dynamic)"
def _infer_array_type(elements: List[Any]) -> Optional[str]:
"""
Infer array type from raw Python list elements.
Classifies each element then delegates to _infer_array_type_from_type_strings.
"""
if len(elements) == 0:
return None
types = []
for elem in elements:
if elem is None:
continue
if isinstance(elem, dict):
types.append("JSON")
elif isinstance(elem, str):
types.append("String")
elif isinstance(elem, bool): # must be before int (bool is subclass of int)
types.append("Bool")
elif isinstance(elem, float):
types.append("Float64")
elif isinstance(elem, int):
types.append("Int64")
return _infer_array_type_from_type_strings(types)
def _python_type_to_clickhouse_type(value: Any) -> str:
"""
Convert Python type to ClickHouse JSON type string.
Maps Python types to ClickHouse JSON data types.
"""
if value is None:
return "String" # Default for null values
if isinstance(value, bool):
return "Bool"
elif isinstance(value, int):
return "Int64"
elif isinstance(value, float):
return "Float64"
elif isinstance(value, str):
return "String"
elif isinstance(value, list):
# Use the sophisticated array type inference
array_type = _infer_array_type(value)
return array_type if array_type else "Array(Dynamic)"
elif isinstance(value, dict):
return "JSON"
else:
return "String" # Default fallback
def _extract_json_paths(
obj: Any,
current_path: str = "",
path_types: Optional[Dict[str, Set[str]]] = None,
level: int = 0,
) -> Dict[str, Set[str]]:
"""
Recursively extract all paths and their types from a JSON object.
Matches jsontypeexporter's analyzePValue logic.
Args:
obj: The JSON object to traverse
current_path: Current path being built (e.g., "user.name")
path_types: Dictionary mapping paths to sets of types found
level: Current nesting level (for depth limiting)
Returns:
Dictionary mapping paths to sets of type strings
"""
if path_types is None:
path_types = {}
if obj is None:
if current_path:
if current_path not in path_types:
path_types[current_path] = set()
path_types[current_path].add("String") # Null defaults to String
return path_types
if isinstance(obj, dict):
# For objects, add the object itself and recurse into keys
if current_path:
if current_path not in path_types:
path_types[current_path] = set()
path_types[current_path].add("JSON")
for key, value in obj.items():
# Build the path for this key
if current_path:
new_path = f"{current_path}.{key}"
else:
new_path = key
# Recurse into the value
_extract_json_paths(value, new_path, path_types, level + 1)
elif isinstance(obj, list):
# Skip empty arrays
if len(obj) == 0:
return path_types
# Collect types from array elements (matching Go: types := make([]pcommon.ValueType, 0, s.Len()))
types = []
for item in obj:
if isinstance(item, dict):
# When traversing into array element objects, use ArraySuffix ([])
# This matches: prefix+ArraySuffix in the Go code
# Example: if current_path is "education", we use "education[]" to traverse into objects
array_prefix = current_path + ARRAY_SUFFIX if current_path else ""
for key, value in item.items():
if array_prefix:
# Use array separator: education[].name
array_path = f"{array_prefix}.{key}"
else:
array_path = key
# Recurse without increasing level (matching Go behavior)
_extract_json_paths(value, array_path, path_types, level)
types.append("JSON")
elif isinstance(item, list):
# Arrays inside arrays are not supported - skip the whole path
# Matching Go: e.logger.Error("arrays inside arrays are not supported!", ...); return nil
return path_types
elif isinstance(item, str):
types.append("String")
elif isinstance(item, bool):
types.append("Bool")
elif isinstance(item, float):
types.append("Float64")
elif isinstance(item, int):
types.append("Int64")
# Infer array type from collected types (matching Go: if mask := inferArrayMask(types); mask != 0)
if len(types) > 0:
array_type = _infer_array_type_from_type_strings(types)
if array_type and current_path:
if current_path not in path_types:
path_types[current_path] = set()
path_types[current_path].add(array_type)
else:
# Primitive value (string, number, bool)
if current_path:
if current_path not in path_types:
path_types[current_path] = set()
obj_type = _python_type_to_clickhouse_type(obj)
path_types[current_path].add(obj_type)
return path_types
def _parse_json_bodies_and_extract_paths(
json_bodies: List[str],
timestamp: Optional[datetime.datetime] = None,
) -> List[JSONPathType]:
"""
Parse JSON bodies and extract all paths with their types.
This mimics the behavior of jsontypeexporter.
Args:
json_bodies: List of JSON body strings to parse
timestamp: Timestamp to use for last_seen (defaults to now)
Returns:
List of JSONPathType objects with all discovered paths and types
"""
if timestamp is None:
timestamp = datetime.datetime.now()
# Aggregate all paths and their types across all JSON bodies
all_path_types: Dict[str, Set[str]] = {}
for json_body in json_bodies:
try:
parsed = json.loads(json_body)
_extract_json_paths(parsed, "", all_path_types, level=0)
except (json.JSONDecodeError, TypeError):
# Skip invalid JSON
continue
# Convert to list of JSONPathType objects
# Each path can have multiple types, so we create one JSONPathType per type
path_type_objects: List[JSONPathType] = []
for path, types_set in all_path_types.items():
for type_str in types_set:
path_type_objects.append(
JSONPathType(path=path, type=type_str, last_seen=timestamp)
)
return path_type_objects
@pytest.fixture(name="export_json_types", scope="function")
def export_json_types(
clickhouse: types.TestContainerClickhouse,
request: pytest.FixtureRequest, # To access migrator fixture
) -> Generator[
Callable[[Union[List[JSONPathType], List[str], List[Any]]], None], Any, None
]:
"""
Fixture for exporting JSON type metadata to the path_types table.
This is a simpler version of jsontypeexporter for test fixtures.
The function can accept:
1. List of JSONPathType objects (manual specification)
2. List of JSON body strings (auto-extract paths)
3. List of Logs objects (extract from body_json field)
Usage examples:
# Manual specification
export_json_types([
JSONPathType(path="user.name", type="String"),
JSONPathType(path="user.age", type="Int64"),
])
# Auto-extract from JSON strings
export_json_types([
'{"user": {"name": "alice", "age": 25}}',
'{"user": {"name": "bob", "age": 30}}',
])
# Auto-extract from Logs objects
export_json_types(logs_list)
"""
# Ensure migrator has run to create the table
try:
request.getfixturevalue("migrator")
except Exception:
# If migrator fixture is not available, that's okay - table might already exist
pass
def _export_json_types(
data: Union[
List[JSONPathType], List[str], List[Any]
], # List[Logs] but avoiding circular import
) -> None:
"""
Export JSON type metadata to signoz_metadata.distributed_json_path_types table.
This table stores path and type information for body JSON fields.
"""
path_types: List[JSONPathType] = []
if len(data) == 0:
return
# Determine input type and convert to JSONPathType list
first_item = data[0]
if isinstance(first_item, JSONPathType):
# Already JSONPathType objects
path_types = data # type: ignore
elif isinstance(first_item, str):
# List of JSON strings - parse and extract paths
path_types = _parse_json_bodies_and_extract_paths(data) # type: ignore
else:
# Assume it's a list of Logs objects - extract body_v2
json_bodies: List[str] = []
for log in data: # type: ignore
# Try to get body_v2 attribute
if hasattr(log, "body_v2") and log.body_v2:
json_bodies.append(log.body_v2)
elif hasattr(log, "body") and log.body:
# Fallback to body if body_v2 not available
try:
# Try to parse as JSON
json.loads(log.body)
json_bodies.append(log.body)
except (json.JSONDecodeError, TypeError):
pass
if json_bodies:
path_types = _parse_json_bodies_and_extract_paths(json_bodies)
if len(path_types) == 0:
return
clickhouse.conn.insert(
database="signoz_metadata",
table="distributed_json_path_types",
data=[path_type.np_arr() for path_type in path_types],
column_names=[
"path",
"type",
"last_seen",
],
)
yield _export_json_types
# Cleanup - truncate the local table after tests (following pattern from logs fixture)
clickhouse.conn.query(
f"TRUNCATE TABLE signoz_metadata.json_path_types ON CLUSTER '{clickhouse.env['SIGNOZ_TELEMETRYSTORE_CLICKHOUSE_CLUSTER']}' SYNC"
)
@pytest.fixture(name="export_promoted_paths", scope="function")
def export_promoted_paths(
clickhouse: types.TestContainerClickhouse,
request: pytest.FixtureRequest, # To access migrator fixture
) -> Generator[Callable[[List[str]], None], Any, None]:
"""
Fixture for exporting promoted JSON paths to the promoted paths table.
"""
# Ensure migrator has run to create the table
try:
request.getfixturevalue("migrator")
except Exception:
# If migrator fixture is not available, that's okay - table might already exist
pass
def _export_promoted_paths(paths: List[str]) -> None:
if len(paths) == 0:
return
now_ms = int(datetime.datetime.now().timestamp() * 1000)
rows = [(path, now_ms) for path in paths]
clickhouse.conn.insert(
database="signoz_metadata",
table="distributed_json_promoted_paths",
data=rows,
column_names=[
"path",
"created_at",
],
)
yield _export_promoted_paths
clickhouse.conn.query(
f"TRUNCATE TABLE signoz_metadata.json_promoted_paths ON CLUSTER '{clickhouse.env['SIGNOZ_TELEMETRYSTORE_CLICKHOUSE_CLUSTER']}' SYNC"
)

View File

@@ -122,6 +122,8 @@ class Logs(ABC):
resources: dict[str, Any] = {},
attributes: dict[str, Any] = {},
body: str = "default body",
body_v2: Optional[str] = None,
body_promoted: Optional[str] = None,
severity_text: str = "INFO",
trace_id: str = "",
span_id: str = "",
@@ -167,6 +169,33 @@ class Logs(ABC):
# Set body
self.body = body
# Set body_v2 - if body is JSON, parse and stringify it, otherwise use empty string
# ClickHouse accepts String input for JSON column
if body_v2 is not None:
self.body_v2 = body_v2
else:
# Try to parse body as JSON; if successful use it directly,
# otherwise wrap as {"message": body} matching the normalize operator behavior.
try:
json.loads(body)
self.body_v2 = body
except (json.JSONDecodeError, TypeError):
self.body_v2 = json.dumps({"message": body})
# Set body_promoted - must be valid JSON
# Tests will explicitly pass promoted column's content, but we validate it
if body_promoted is not None:
# Validate that it's valid JSON
try:
json.loads(body_promoted)
self.body_promoted = body_promoted
except (json.JSONDecodeError, TypeError):
# If invalid, default to empty JSON object
self.body_promoted = "{}"
else:
# Default to empty JSON object (valid JSON)
self.body_promoted = "{}"
# Process resources and attributes
self.resources_string = {k: str(v) for k, v in resources.items()}
self.resource_json = (
@@ -326,6 +355,8 @@ class Logs(ABC):
self.severity_text,
self.severity_number,
self.body,
self.body_v2,
self.body_promoted,
self.attributes_string,
self.attributes_number,
self.attributes_bool,
@@ -454,31 +485,53 @@ def insert_logs(
data=[resource_key.np_arr() for resource_key in resource_keys],
)
# All columns in insertion order (must match Logs.np_arr() order)
all_column_names = [
"ts_bucket_start",
"resource_fingerprint",
"timestamp",
"observed_timestamp",
"id",
"trace_id",
"span_id",
"trace_flags",
"severity_text",
"severity_number",
"body",
"body_v2",
"body_promoted",
"attributes_string",
"attributes_number",
"attributes_bool",
"resources_string",
"scope_name",
"scope_version",
"scope_string",
"resource",
]
# Check if body_v2 column exists (only present when ENABLE_LOGS_MIGRATIONS_V2 migration has run)
result = clickhouse.conn.query(
"SELECT count() FROM system.columns WHERE database = 'signoz_logs' AND table = 'logs_v2' AND name = 'body_v2'"
)
has_json_body = result.result_rows[0][0] > 0
if has_json_body:
column_names = all_column_names
data = [log.np_arr() for log in logs]
else:
json_body_cols = {"body_v2", "body_promoted"}
keep_indices = [
i for i, c in enumerate(all_column_names) if c not in json_body_cols
]
column_names = [all_column_names[i] for i in keep_indices]
data = [log.np_arr()[keep_indices] for log in logs]
clickhouse.conn.insert(
database="signoz_logs",
table="distributed_logs_v2",
data=[log.np_arr() for log in logs],
column_names=[
"ts_bucket_start",
"resource_fingerprint",
"timestamp",
"observed_timestamp",
"id",
"trace_id",
"span_id",
"trace_flags",
"severity_text",
"severity_number",
"body",
"attributes_string",
"attributes_number",
"attributes_bool",
"resources_string",
"scope_name",
"scope_version",
"scope_string",
"resource",
],
data=data,
column_names=column_names,
)
yield _insert_logs

View File

@@ -1,3 +1,5 @@
from typing import Optional
import docker
import pytest
from testcontainers.core.container import Network
@@ -8,27 +10,32 @@ from fixtures.logger import setup_logger
logger = setup_logger(__name__)
@pytest.fixture(name="migrator", scope="package")
def migrator(
def create_migrator(
network: Network,
clickhouse: types.TestContainerClickhouse,
request: pytest.FixtureRequest,
pytestconfig: pytest.Config,
cache_key: str = "migrator",
env_overrides: Optional[dict] = None,
) -> types.Operation:
"""
Package-scoped fixture for running schema migrations.
Factory function for running schema migrations.
Accepts optional env_overrides to customize the migrator environment.
"""
def create() -> None:
version = request.config.getoption("--schema-migrator-version")
client = docker.from_env()
environment = dict(env_overrides) if env_overrides else {}
container = client.containers.run(
image=f"signoz/signoz-schema-migrator:{version}",
command=f"sync --replication=true --cluster-name=cluster --up= --dsn={clickhouse.env["SIGNOZ_TELEMETRYSTORE_CLICKHOUSE_DSN"]}",
detach=True,
auto_remove=False,
network=network.id,
environment=environment,
)
result = container.wait()
@@ -47,6 +54,7 @@ def migrator(
detach=True,
auto_remove=False,
network=network.id,
environment=environment,
)
result = container.wait()
@@ -59,7 +67,7 @@ def migrator(
container.remove()
return types.Operation(name="migrator")
return types.Operation(name=cache_key)
def delete(_: types.Operation) -> None:
pass
@@ -70,9 +78,27 @@ def migrator(
return dev.wrap(
request,
pytestconfig,
"migrator",
cache_key,
lambda: types.Operation(name=""),
create,
delete,
restore,
)
@pytest.fixture(name="migrator", scope="package")
def migrator(
network: Network,
clickhouse: types.TestContainerClickhouse,
request: pytest.FixtureRequest,
pytestconfig: pytest.Config,
) -> types.Operation:
"""
Package-scoped fixture for running schema migrations.
"""
return create_migrator(
network=network,
clickhouse=clickhouse,
request=request,
pytestconfig=pytestconfig,
)

File diff suppressed because it is too large Load Diff

View File

@@ -0,0 +1,70 @@
import pytest
from testcontainers.core.container import Network
from fixtures import types
from fixtures.migrator import create_migrator
from fixtures.signoz import create_signoz
UNSUPPORTED_CLICKHOUSE_VERSIONS = {"25.5.6"}
def pytest_collection_modifyitems(
config: pytest.Config, items: list[pytest.Item]
) -> None:
version = config.getoption("--clickhouse-version")
if version in UNSUPPORTED_CLICKHOUSE_VERSIONS:
skip = pytest.mark.skip(
reason=f"JSON body QB tests require ClickHouse > {version}"
)
for item in items:
item.add_marker(skip)
@pytest.fixture(name="migrator", scope="package")
def migrator_json(
network: Network,
clickhouse: types.TestContainerClickhouse,
request: pytest.FixtureRequest,
pytestconfig: pytest.Config,
) -> types.Operation:
"""
Package-scoped migrator with ENABLE_LOGS_MIGRATIONS_V2=1.
"""
return create_migrator(
network=network,
clickhouse=clickhouse,
request=request,
pytestconfig=pytestconfig,
cache_key="migrator-json-body",
env_overrides={
"ENABLE_LOGS_MIGRATIONS_V2": "1",
},
)
@pytest.fixture(name="signoz", scope="package")
def signoz_json_body(
network: Network,
zeus: types.TestContainerDocker,
gateway: types.TestContainerDocker,
sqlstore: types.TestContainerSQL,
clickhouse: types.TestContainerClickhouse,
request: pytest.FixtureRequest,
pytestconfig: pytest.Config,
) -> types.SigNoz:
"""
Package-scoped fixture for SigNoz with BODY_JSON_QUERY_ENABLED=true.
"""
return create_signoz(
network=network,
zeus=zeus,
gateway=gateway,
sqlstore=sqlstore,
clickhouse=clickhouse,
request=request,
pytestconfig=pytestconfig,
cache_key="signoz-json-body",
env_overrides={
"BODY_JSON_QUERY_ENABLED": "true",
},
)