mirror of
https://github.com/SigNoz/signoz.git
synced 2026-04-13 15:40:20 +01:00
Compare commits
39 Commits
chore/json
...
feat/json-
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
9176ef0589 | ||
|
|
5c2a338189 | ||
|
|
704bab23cf | ||
|
|
371da26b3c | ||
|
|
97fbfbdc13 | ||
|
|
4b112988ef | ||
|
|
a47ecf3907 | ||
|
|
e4a78cf556 | ||
|
|
b6adecc294 | ||
|
|
40333a5fee | ||
|
|
4af6a9abae | ||
|
|
55e892dad3 | ||
|
|
181116308f | ||
|
|
eaa678910b | ||
|
|
e994caeb02 | ||
|
|
10840f8495 | ||
|
|
1fcd3adfc8 | ||
|
|
3e14b26b00 | ||
|
|
b30bfa6371 | ||
|
|
e7f4a04b36 | ||
|
|
0687634da3 | ||
|
|
7e7732243e | ||
|
|
2f952e402f | ||
|
|
a12febca4a | ||
|
|
cb71c9c3f7 | ||
|
|
1cd4ce6509 | ||
|
|
9299c8ab18 | ||
|
|
24749de269 | ||
|
|
39098ec3f4 | ||
|
|
fe554f5c94 | ||
|
|
8a60a041a6 | ||
|
|
541f19c34a | ||
|
|
010db03d6e | ||
|
|
5408acbd8c | ||
|
|
0de6c85f81 | ||
|
|
69ec24fa05 | ||
|
|
539d732b65 | ||
|
|
843d5fb199 | ||
|
|
fabdfb8cc1 |
3
.github/workflows/integrationci.yaml
vendored
3
.github/workflows/integrationci.yaml
vendored
@@ -52,6 +52,7 @@ jobs:
|
||||
- ingestionkeys
|
||||
- rootuser
|
||||
- serviceaccount
|
||||
- querier_json_body
|
||||
sqlstore-provider:
|
||||
- postgres
|
||||
- sqlite
|
||||
@@ -61,7 +62,7 @@ jobs:
|
||||
- 25.5.6
|
||||
- 25.12.5
|
||||
schema-migrator-version:
|
||||
- v0.142.0
|
||||
- v0.144.2
|
||||
postgres-version:
|
||||
- 15
|
||||
if: |
|
||||
|
||||
@@ -14,7 +14,7 @@ import (
|
||||
sdkmetric "go.opentelemetry.io/otel/metric"
|
||||
sdkmetricnoop "go.opentelemetry.io/otel/metric/noop"
|
||||
sdkresource "go.opentelemetry.io/otel/sdk/resource"
|
||||
semconv "go.opentelemetry.io/otel/semconv/v1.40.0"
|
||||
semconv "go.opentelemetry.io/otel/semconv/v1.39.0"
|
||||
sdktrace "go.opentelemetry.io/otel/trace"
|
||||
)
|
||||
|
||||
|
||||
@@ -207,16 +207,23 @@ func AdjustKey(key *telemetrytypes.TelemetryFieldKey, keys map[string][]*telemet
|
||||
indexes := []telemetrytypes.JSONDataTypeIndex{}
|
||||
fieldContextsSeen := map[telemetrytypes.FieldContext]bool{}
|
||||
dataTypesSeen := map[telemetrytypes.FieldDataType]bool{}
|
||||
jsonTypesSeen := map[string]*telemetrytypes.JSONDataType{}
|
||||
for _, matchingKey := range matchingKeys {
|
||||
materialized = materialized && matchingKey.Materialized
|
||||
fieldContextsSeen[matchingKey.FieldContext] = true
|
||||
dataTypesSeen[matchingKey.FieldDataType] = true
|
||||
if matchingKey.JSONDataType != nil {
|
||||
jsonTypesSeen[matchingKey.JSONDataType.StringValue()] = matchingKey.JSONDataType
|
||||
}
|
||||
indexes = append(indexes, matchingKey.Indexes...)
|
||||
}
|
||||
for _, matchingKey := range contextPrefixedMatchingKeys {
|
||||
materialized = materialized && matchingKey.Materialized
|
||||
fieldContextsSeen[matchingKey.FieldContext] = true
|
||||
dataTypesSeen[matchingKey.FieldDataType] = true
|
||||
if matchingKey.JSONDataType != nil {
|
||||
jsonTypesSeen[matchingKey.JSONDataType.StringValue()] = matchingKey.JSONDataType
|
||||
}
|
||||
indexes = append(indexes, matchingKey.Indexes...)
|
||||
}
|
||||
key.Materialized = materialized
|
||||
@@ -241,6 +248,15 @@ func AdjustKey(key *telemetrytypes.TelemetryFieldKey, keys map[string][]*telemet
|
||||
break
|
||||
}
|
||||
}
|
||||
|
||||
if len(jsonTypesSeen) == 1 && key.JSONDataType == nil {
|
||||
// all matching keys have same JSON data type, use it
|
||||
for _, jt := range jsonTypesSeen {
|
||||
actions = append(actions, fmt.Sprintf("Adjusting key %s to have JSON data type %s", key, jt.StringValue()))
|
||||
key.JSONDataType = jt
|
||||
break
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return actions
|
||||
|
||||
@@ -276,12 +276,11 @@ func (m *fieldMapper) FieldFor(ctx context.Context, tsStart, tsEnd uint64, key *
|
||||
continue
|
||||
}
|
||||
|
||||
if key.FieldDataType == telemetrytypes.FieldDataTypeUnspecified {
|
||||
if key.JSONDataType == nil {
|
||||
return "", qbtypes.ErrColumnNotFound
|
||||
}
|
||||
|
||||
jdt := key.GetJSONDataType()
|
||||
if key.KeyNameContainsArray() && !jdt.IsArray {
|
||||
if key.KeyNameContainsArray() && !key.JSONDataType.IsArray {
|
||||
return "", errors.NewInvalidInputf(errors.CodeInvalidInput, "FieldFor not supported for nested fields; only supported for flat paths (e.g. body.status.detail) and paths of Array type: %s(%s)", key.Name, key.FieldDataType)
|
||||
}
|
||||
|
||||
|
||||
@@ -220,7 +220,7 @@ func TestJSONStmtBuilder_PrimitivePaths(t *testing.T) {
|
||||
expected: TestExpected{
|
||||
WhereClause: "(((LOWER(toString(dynamicElement(body_v2.`user.age`, 'Int64'))) LIKE LOWER(?)) AND has(JSONAllPaths(body_v2), 'user.age')) OR ((LOWER(dynamicElement(body_v2.`user.age`, 'String')) LIKE LOWER(?)) AND has(JSONAllPaths(body_v2), 'user.age')))",
|
||||
Args: []any{uint64(1747945619), uint64(1747983448), "%25%", "%25%", "1747947419000000000", uint64(1747945619), "1747983448000000000", uint64(1747983448), 10},
|
||||
Warnings: []string{"Key `user.age` is ambiguous, found 2 different combinations of field context / data type: [name=user.age,context=body,datatype=int64 name=user.age,context=body,datatype=string]."},
|
||||
Warnings: []string{"Key `user.age` is ambiguous, found 2 different combinations of field context / data type: [name=user.age,context=body,datatype=int64,jsondatatype=Int64 name=user.age,context=body,datatype=string,jsondatatype=String]."},
|
||||
},
|
||||
},
|
||||
{
|
||||
@@ -414,7 +414,7 @@ func TestStatementBuilderListQueryBodyPromoted(t *testing.T) {
|
||||
},
|
||||
expected: TestExpected{
|
||||
Args: []any{uint64(1747945619), uint64(1747983448), "%1.65%", 1.65, "%1.65%", 1.65, "%1.65%", 1.65, "%1.65%", 1.65, "1747947419000000000", uint64(1747945619), "1747983448000000000", uint64(1747983448), 10},
|
||||
Warnings: []string{"Key `education[].parameters` is ambiguous, found 2 different combinations of field context / data type: [name=education[].parameters,context=body,datatype=[]float64,materialized=true name=education[].parameters,context=body,datatype=[]dynamic,materialized=true]."},
|
||||
Warnings: []string{"Key `education[].parameters` is ambiguous, found 2 different combinations of field context / data type: [name=education[].parameters,context=body,datatype=[]float64,materialized=true,jsondatatype=Array(Nullable(Float64)) name=education[].parameters,context=body,datatype=[]dynamic,materialized=true,jsondatatype=Array(Dynamic)]."},
|
||||
},
|
||||
expectedErr: nil,
|
||||
},
|
||||
@@ -441,7 +441,7 @@ func TestStatementBuilderListQueryBodyPromoted(t *testing.T) {
|
||||
},
|
||||
expected: TestExpected{
|
||||
Args: []any{uint64(1747945619), uint64(1747983448), "%true%", true, "%true%", true, "%true%", true, "%true%", true, "1747947419000000000", uint64(1747945619), "1747983448000000000", uint64(1747983448), 10},
|
||||
Warnings: []string{"Key `education[].parameters` is ambiguous, found 2 different combinations of field context / data type: [name=education[].parameters,context=body,datatype=[]float64,materialized=true name=education[].parameters,context=body,datatype=[]dynamic,materialized=true]."},
|
||||
Warnings: []string{"Key `education[].parameters` is ambiguous, found 2 different combinations of field context / data type: [name=education[].parameters,context=body,datatype=[]float64,materialized=true,jsondatatype=Array(Nullable(Float64)) name=education[].parameters,context=body,datatype=[]dynamic,materialized=true,jsondatatype=Array(Dynamic)]."},
|
||||
},
|
||||
expectedErr: nil,
|
||||
},
|
||||
@@ -455,7 +455,7 @@ func TestStatementBuilderListQueryBodyPromoted(t *testing.T) {
|
||||
},
|
||||
expected: TestExpected{
|
||||
Args: []any{uint64(1747945619), uint64(1747983448), "%passed%", "passed", "%passed%", "passed", "%passed%", "passed", "%passed%", "passed", "1747947419000000000", uint64(1747945619), "1747983448000000000", uint64(1747983448), 10},
|
||||
Warnings: []string{"Key `education[].parameters` is ambiguous, found 2 different combinations of field context / data type: [name=education[].parameters,context=body,datatype=[]float64,materialized=true name=education[].parameters,context=body,datatype=[]dynamic,materialized=true]."},
|
||||
Warnings: []string{"Key `education[].parameters` is ambiguous, found 2 different combinations of field context / data type: [name=education[].parameters,context=body,datatype=[]float64,materialized=true,jsondatatype=Array(Nullable(Float64)) name=education[].parameters,context=body,datatype=[]dynamic,materialized=true,jsondatatype=Array(Dynamic)]."},
|
||||
},
|
||||
expectedErr: nil,
|
||||
},
|
||||
@@ -549,7 +549,7 @@ func TestJSONStmtBuilder_ArrayPaths(t *testing.T) {
|
||||
expected: TestExpected{
|
||||
WhereClause: "((NOT arrayExists(`body_v2.education`-> toFloat64OrNull(dynamicElement(`body_v2.education`.`type`, 'String')) = ?, dynamicElement(body_v2.`education`, 'Array(JSON(max_dynamic_types=16, max_dynamic_paths=0))'))) AND (NOT arrayExists(`body_v2.education`-> dynamicElement(`body_v2.education`.`type`, 'Int64') = ?, dynamicElement(body_v2.`education`, 'Array(JSON(max_dynamic_types=16, max_dynamic_paths=0))'))))",
|
||||
Args: []any{uint64(1747945619), uint64(1747983448), int64(10001), int64(10001), "1747947419000000000", uint64(1747945619), "1747983448000000000", uint64(1747983448), 10},
|
||||
Warnings: []string{"Key `education[].type` is ambiguous, found 2 different combinations of field context / data type: [name=education[].type,context=body,datatype=string name=education[].type,context=body,datatype=int64]."},
|
||||
Warnings: []string{"Key `education[].type` is ambiguous, found 2 different combinations of field context / data type: [name=education[].type,context=body,datatype=string,jsondatatype=String name=education[].type,context=body,datatype=int64,jsondatatype=Int64]."},
|
||||
},
|
||||
},
|
||||
{
|
||||
@@ -576,7 +576,7 @@ func TestJSONStmtBuilder_ArrayPaths(t *testing.T) {
|
||||
expected: TestExpected{
|
||||
WhereClause: "(((arrayExists(`body_v2.education`-> (arrayExists(x -> LOWER(toString(x)) LIKE LOWER(?), dynamicElement(`body_v2.education`.`parameters`, 'Array(Nullable(Float64))')) OR arrayExists(x -> toFloat64(x) = ?, dynamicElement(`body_v2.education`.`parameters`, 'Array(Nullable(Float64))'))), dynamicElement(body_v2.`education`, 'Array(JSON(max_dynamic_types=16, max_dynamic_paths=0))'))) AND has(JSONAllPaths(body_v2), 'education')) OR ((arrayExists(`body_v2.education`-> (arrayExists(x -> LOWER(toString(x)) LIKE LOWER(?), arrayFilter(x->(dynamicType(x) IN ('String', 'Int64', 'Float64', 'Bool')), dynamicElement(`body_v2.education`.`parameters`, 'Array(Dynamic)'))) OR arrayExists(x -> accurateCastOrNull(x, 'Float64') = ?, arrayFilter(x->(dynamicType(x) IN ('String', 'Int64', 'Float64', 'Bool')), dynamicElement(`body_v2.education`.`parameters`, 'Array(Dynamic)')))), dynamicElement(body_v2.`education`, 'Array(JSON(max_dynamic_types=16, max_dynamic_paths=0))'))) AND has(JSONAllPaths(body_v2), 'education')))",
|
||||
Args: []any{uint64(1747945619), uint64(1747983448), "%1.65%", 1.65, "%1.65%", 1.65, "1747947419000000000", uint64(1747945619), "1747983448000000000", uint64(1747983448), 10},
|
||||
Warnings: []string{"Key `education[].parameters` is ambiguous, found 2 different combinations of field context / data type: [name=education[].parameters,context=body,datatype=[]float64 name=education[].parameters,context=body,datatype=[]dynamic]."},
|
||||
Warnings: []string{"Key `education[].parameters` is ambiguous, found 2 different combinations of field context / data type: [name=education[].parameters,context=body,datatype=[]float64,jsondatatype=Array(Nullable(Float64)) name=education[].parameters,context=body,datatype=[]dynamic,jsondatatype=Array(Dynamic)]."},
|
||||
},
|
||||
},
|
||||
{
|
||||
@@ -585,7 +585,7 @@ func TestJSONStmtBuilder_ArrayPaths(t *testing.T) {
|
||||
expected: TestExpected{
|
||||
WhereClause: "(((arrayExists(`body_v2.education`-> arrayExists(x -> toString(x) = ?, dynamicElement(`body_v2.education`.`parameters`, 'Array(Nullable(Float64))')), dynamicElement(body_v2.`education`, 'Array(JSON(max_dynamic_types=16, max_dynamic_paths=0))'))) AND has(JSONAllPaths(body_v2), 'education')) OR ((arrayExists(`body_v2.education`-> arrayExists(x -> toString(x) = ?, arrayFilter(x->(dynamicType(x) IN ('String', 'Int64', 'Float64', 'Bool')), dynamicElement(`body_v2.education`.`parameters`, 'Array(Dynamic)'))), dynamicElement(body_v2.`education`, 'Array(JSON(max_dynamic_types=16, max_dynamic_paths=0))'))) AND has(JSONAllPaths(body_v2), 'education')))",
|
||||
Args: []any{uint64(1747945619), uint64(1747983448), "passed", "passed", "1747947419000000000", uint64(1747945619), "1747983448000000000", uint64(1747983448), 10},
|
||||
Warnings: []string{"Key `education[].parameters` is ambiguous, found 2 different combinations of field context / data type: [name=education[].parameters,context=body,datatype=[]float64 name=education[].parameters,context=body,datatype=[]dynamic]."},
|
||||
Warnings: []string{"Key `education[].parameters` is ambiguous, found 2 different combinations of field context / data type: [name=education[].parameters,context=body,datatype=[]float64,jsondatatype=Array(Nullable(Float64)) name=education[].parameters,context=body,datatype=[]dynamic,jsondatatype=Array(Dynamic)]."},
|
||||
},
|
||||
},
|
||||
{
|
||||
@@ -594,7 +594,7 @@ func TestJSONStmtBuilder_ArrayPaths(t *testing.T) {
|
||||
expected: TestExpected{
|
||||
WhereClause: "(((arrayExists(`body_v2.education`-> (arrayExists(x -> LOWER(toString(x)) LIKE LOWER(?), dynamicElement(`body_v2.education`.`parameters`, 'Array(Nullable(Float64))')) OR arrayExists(x -> toString(x) = ?, dynamicElement(`body_v2.education`.`parameters`, 'Array(Nullable(Float64))'))), dynamicElement(body_v2.`education`, 'Array(JSON(max_dynamic_types=16, max_dynamic_paths=0))'))) AND has(JSONAllPaths(body_v2), 'education')) OR ((arrayExists(`body_v2.education`-> (arrayExists(x -> LOWER(toString(x)) LIKE LOWER(?), arrayFilter(x->(dynamicType(x) IN ('String', 'Int64', 'Float64', 'Bool')), dynamicElement(`body_v2.education`.`parameters`, 'Array(Dynamic)'))) OR arrayExists(x -> toString(x) = ?, arrayFilter(x->(dynamicType(x) IN ('String', 'Int64', 'Float64', 'Bool')), dynamicElement(`body_v2.education`.`parameters`, 'Array(Dynamic)')))), dynamicElement(body_v2.`education`, 'Array(JSON(max_dynamic_types=16, max_dynamic_paths=0))'))) AND has(JSONAllPaths(body_v2), 'education')))",
|
||||
Args: []any{uint64(1747945619), uint64(1747983448), "%passed%", "passed", "%passed%", "passed", "1747947419000000000", uint64(1747945619), "1747983448000000000", uint64(1747983448), 10},
|
||||
Warnings: []string{"Key `education[].parameters` is ambiguous, found 2 different combinations of field context / data type: [name=education[].parameters,context=body,datatype=[]float64 name=education[].parameters,context=body,datatype=[]dynamic]."},
|
||||
Warnings: []string{"Key `education[].parameters` is ambiguous, found 2 different combinations of field context / data type: [name=education[].parameters,context=body,datatype=[]float64,jsondatatype=Array(Nullable(Float64)) name=education[].parameters,context=body,datatype=[]dynamic,jsondatatype=Array(Dynamic)]."},
|
||||
},
|
||||
},
|
||||
{
|
||||
@@ -603,7 +603,7 @@ func TestJSONStmtBuilder_ArrayPaths(t *testing.T) {
|
||||
expected: TestExpected{
|
||||
WhereClause: "(((arrayExists(`body_v2.education`-> arrayExists(x -> toFloat64(x) IN (?, ?), dynamicElement(`body_v2.education`.`parameters`, 'Array(Nullable(Float64))')), dynamicElement(body_v2.`education`, 'Array(JSON(max_dynamic_types=16, max_dynamic_paths=0))'))) AND has(JSONAllPaths(body_v2), 'education')) OR ((arrayExists(`body_v2.education`-> arrayExists(x -> toString(x) IN (?, ?), arrayFilter(x->(dynamicType(x) IN ('String', 'Int64', 'Float64', 'Bool')), dynamicElement(`body_v2.education`.`parameters`, 'Array(Dynamic)'))), dynamicElement(body_v2.`education`, 'Array(JSON(max_dynamic_types=16, max_dynamic_paths=0))'))) AND has(JSONAllPaths(body_v2), 'education')))",
|
||||
Args: []any{uint64(1747945619), uint64(1747983448), 1.65, 1.99, "1.65", "1.99", "1747947419000000000", uint64(1747945619), "1747983448000000000", uint64(1747983448), 10},
|
||||
Warnings: []string{"Key `education[].parameters` is ambiguous, found 2 different combinations of field context / data type: [name=education[].parameters,context=body,datatype=[]float64 name=education[].parameters,context=body,datatype=[]dynamic]."},
|
||||
Warnings: []string{"Key `education[].parameters` is ambiguous, found 2 different combinations of field context / data type: [name=education[].parameters,context=body,datatype=[]float64,jsondatatype=Array(Nullable(Float64)) name=education[].parameters,context=body,datatype=[]dynamic,jsondatatype=Array(Dynamic)]."},
|
||||
},
|
||||
},
|
||||
{
|
||||
@@ -612,7 +612,7 @@ func TestJSONStmtBuilder_ArrayPaths(t *testing.T) {
|
||||
expected: TestExpected{
|
||||
WhereClause: "((NOT arrayExists(`body_v2.education`-> (arrayExists(x -> LOWER(toString(x)) LIKE LOWER(?), dynamicElement(`body_v2.education`.`parameters`, 'Array(Nullable(Float64))')) OR arrayExists(x -> toFloat64(x) = ?, dynamicElement(`body_v2.education`.`parameters`, 'Array(Nullable(Float64))'))), dynamicElement(body_v2.`education`, 'Array(JSON(max_dynamic_types=16, max_dynamic_paths=0))'))) AND (NOT arrayExists(`body_v2.education`-> (arrayExists(x -> LOWER(toString(x)) LIKE LOWER(?), arrayFilter(x->(dynamicType(x) IN ('String', 'Int64', 'Float64', 'Bool')), dynamicElement(`body_v2.education`.`parameters`, 'Array(Dynamic)'))) OR arrayExists(x -> accurateCastOrNull(x, 'Float64') = ?, arrayFilter(x->(dynamicType(x) IN ('String', 'Int64', 'Float64', 'Bool')), dynamicElement(`body_v2.education`.`parameters`, 'Array(Dynamic)')))), dynamicElement(body_v2.`education`, 'Array(JSON(max_dynamic_types=16, max_dynamic_paths=0))'))))",
|
||||
Args: []any{uint64(1747945619), uint64(1747983448), "%1.65%", float64(1.65), "%1.65%", float64(1.65), "1747947419000000000", uint64(1747945619), "1747983448000000000", uint64(1747983448), 10},
|
||||
Warnings: []string{"Key `education[].parameters` is ambiguous, found 2 different combinations of field context / data type: [name=education[].parameters,context=body,datatype=[]float64 name=education[].parameters,context=body,datatype=[]dynamic]."},
|
||||
Warnings: []string{"Key `education[].parameters` is ambiguous, found 2 different combinations of field context / data type: [name=education[].parameters,context=body,datatype=[]float64,jsondatatype=Array(Nullable(Float64)) name=education[].parameters,context=body,datatype=[]dynamic,jsondatatype=Array(Dynamic)]."},
|
||||
},
|
||||
},
|
||||
{
|
||||
@@ -622,7 +622,7 @@ func TestJSONStmtBuilder_ArrayPaths(t *testing.T) {
|
||||
WhereClause: "(has(arrayFlatten(arrayConcat(arrayMap(`body_v2.education`->dynamicElement(`body_v2.education`.`parameters`, 'Array(Nullable(Float64))'), dynamicElement(body_v2.`education`, 'Array(JSON(max_dynamic_types=16, max_dynamic_paths=0))')))), ?) OR has(arrayFlatten(arrayConcat(arrayMap(`body_v2.education`->dynamicElement(`body_v2.education`.`parameters`, 'Array(Dynamic)'), dynamicElement(body_v2.`education`, 'Array(JSON(max_dynamic_types=16, max_dynamic_paths=0))')))), ?))",
|
||||
Args: []any{uint64(1747945619), uint64(1747983448), 1.65, 1.65, "1747947419000000000", uint64(1747945619), "1747983448000000000", uint64(1747983448), 10},
|
||||
Warnings: []string{
|
||||
"Key `education[].parameters` is ambiguous, found 2 different combinations of field context / data type: [name=education[].parameters,context=body,datatype=[]float64 name=education[].parameters,context=body,datatype=[]dynamic].",
|
||||
"Key `education[].parameters` is ambiguous, found 2 different combinations of field context / data type: [name=education[].parameters,context=body,datatype=[]float64,jsondatatype=Array(Nullable(Float64)) name=education[].parameters,context=body,datatype=[]dynamic,jsondatatype=Array(Dynamic)].",
|
||||
},
|
||||
},
|
||||
},
|
||||
@@ -702,7 +702,7 @@ func TestJSONStmtBuilder_ArrayPaths(t *testing.T) {
|
||||
expected: TestExpected{
|
||||
WhereClause: "(((arrayExists(`body_v2.interests`-> arrayExists(`body_v2.interests[].entities`-> arrayExists(`body_v2.interests[].entities[].reviews`-> arrayExists(`body_v2.interests[].entities[].reviews[].entries`-> arrayExists(`body_v2.interests[].entities[].reviews[].entries[].metadata`-> arrayExists(`body_v2.interests[].entities[].reviews[].entries[].metadata[].positions`-> (arrayExists(x -> LOWER(toString(x)) LIKE LOWER(?), dynamicElement(`body_v2.interests[].entities[].reviews[].entries[].metadata[].positions`.`ratings`, 'Array(Nullable(Int64))')) OR arrayExists(x -> toFloat64(x) = ?, dynamicElement(`body_v2.interests[].entities[].reviews[].entries[].metadata[].positions`.`ratings`, 'Array(Nullable(Int64))'))), dynamicElement(`body_v2.interests[].entities[].reviews[].entries[].metadata`.`positions`, 'Array(JSON(max_dynamic_types=0, max_dynamic_paths=0))')), dynamicElement(`body_v2.interests[].entities[].reviews[].entries`.`metadata`, 'Array(JSON(max_dynamic_types=1, max_dynamic_paths=0))')), dynamicElement(`body_v2.interests[].entities[].reviews`.`entries`, 'Array(JSON(max_dynamic_types=2, max_dynamic_paths=0))')), dynamicElement(`body_v2.interests[].entities`.`reviews`, 'Array(JSON(max_dynamic_types=4, max_dynamic_paths=0))')), dynamicElement(`body_v2.interests`.`entities`, 'Array(JSON(max_dynamic_types=8, max_dynamic_paths=0))')), dynamicElement(body_v2.`interests`, 'Array(JSON(max_dynamic_types=16, max_dynamic_paths=0))'))) AND has(JSONAllPaths(body_v2), 'interests')) OR ((arrayExists(`body_v2.interests`-> arrayExists(`body_v2.interests[].entities`-> arrayExists(`body_v2.interests[].entities[].reviews`-> arrayExists(`body_v2.interests[].entities[].reviews[].entries`-> arrayExists(`body_v2.interests[].entities[].reviews[].entries[].metadata`-> arrayExists(`body_v2.interests[].entities[].reviews[].entries[].metadata[].positions`-> (arrayExists(x -> LOWER(x) LIKE LOWER(?), dynamicElement(`body_v2.interests[].entities[].reviews[].entries[].metadata[].positions`.`ratings`, 'Array(Nullable(String))')) OR arrayExists(x -> toFloat64OrNull(x) = ?, dynamicElement(`body_v2.interests[].entities[].reviews[].entries[].metadata[].positions`.`ratings`, 'Array(Nullable(String))'))), dynamicElement(`body_v2.interests[].entities[].reviews[].entries[].metadata`.`positions`, 'Array(JSON(max_dynamic_types=0, max_dynamic_paths=0))')), dynamicElement(`body_v2.interests[].entities[].reviews[].entries`.`metadata`, 'Array(JSON(max_dynamic_types=1, max_dynamic_paths=0))')), dynamicElement(`body_v2.interests[].entities[].reviews`.`entries`, 'Array(JSON(max_dynamic_types=2, max_dynamic_paths=0))')), dynamicElement(`body_v2.interests[].entities`.`reviews`, 'Array(JSON(max_dynamic_types=4, max_dynamic_paths=0))')), dynamicElement(`body_v2.interests`.`entities`, 'Array(JSON(max_dynamic_types=8, max_dynamic_paths=0))')), dynamicElement(body_v2.`interests`, 'Array(JSON(max_dynamic_types=16, max_dynamic_paths=0))'))) AND has(JSONAllPaths(body_v2), 'interests')))",
|
||||
Args: []any{uint64(1747945619), uint64(1747983448), "%4%", float64(4), "%4%", float64(4), "1747947419000000000", uint64(1747945619), "1747983448000000000", uint64(1747983448), 10},
|
||||
Warnings: []string{"Key `interests[].entities[].reviews[].entries[].metadata[].positions[].ratings` is ambiguous, found 2 different combinations of field context / data type: [name=interests[].entities[].reviews[].entries[].metadata[].positions[].ratings,context=body,datatype=[]int64 name=interests[].entities[].reviews[].entries[].metadata[].positions[].ratings,context=body,datatype=[]string]."},
|
||||
Warnings: []string{"Key `interests[].entities[].reviews[].entries[].metadata[].positions[].ratings` is ambiguous, found 2 different combinations of field context / data type: [name=interests[].entities[].reviews[].entries[].metadata[].positions[].ratings,context=body,datatype=[]int64,jsondatatype=Array(Nullable(Int64)) name=interests[].entities[].reviews[].entries[].metadata[].positions[].ratings,context=body,datatype=[]string,jsondatatype=Array(Nullable(String))]."},
|
||||
},
|
||||
},
|
||||
{
|
||||
@@ -946,16 +946,16 @@ func buildTestTelemetryMetadataStore(t *testing.T, addIndexes bool) *telemetryty
|
||||
mockMetadataStore := telemetrytypestest.NewMockMetadataStore()
|
||||
mockMetadataStore.SetStaticFields(IntrinsicFields)
|
||||
types, _ := telemetrytypes.TestJSONTypeSet()
|
||||
for path, fieldDataTypes := range types {
|
||||
for _, fdt := range fieldDataTypes {
|
||||
for path, jsonTypes := range types {
|
||||
for _, jsonType := range jsonTypes {
|
||||
key := &telemetrytypes.TelemetryFieldKey{
|
||||
Name: path,
|
||||
Signal: telemetrytypes.SignalLogs,
|
||||
FieldContext: telemetrytypes.FieldContextBody,
|
||||
FieldDataType: fdt,
|
||||
FieldDataType: telemetrytypes.MappingJSONDataTypeToFieldDataType[jsonType],
|
||||
JSONDataType: &jsonType,
|
||||
}
|
||||
if addIndexes {
|
||||
jsonType := telemetrytypes.MappingFieldDataTypeToJSONDataType[fdt]
|
||||
idx := slices.IndexFunc(telemetrytypes.TestIndexedPaths, func(entry telemetrytypes.TestIndexedPathEntry) bool {
|
||||
return entry.Path == path && entry.Type == jsonType
|
||||
})
|
||||
|
||||
@@ -875,6 +875,7 @@ func TestAdjustKey(t *testing.T) {
|
||||
require.Equal(t, c.expectedKey.FieldContext, key.FieldContext, "field context should match")
|
||||
require.Equal(t, c.expectedKey.FieldDataType, key.FieldDataType, "field data type should match")
|
||||
require.Equal(t, c.expectedKey.Materialized, key.Materialized, "materialized should match")
|
||||
require.Equal(t, c.expectedKey.JSONDataType, key.JSONDataType, "json data type should match")
|
||||
require.Equal(t, c.expectedKey.Indexes, key.Indexes, "json exists should match")
|
||||
})
|
||||
}
|
||||
|
||||
@@ -21,72 +21,133 @@ import (
|
||||
)
|
||||
|
||||
var (
|
||||
defaultPathLimit = 100 // Default limit to prevent full table scans
|
||||
|
||||
CodeUnknownJSONDataType = errors.MustNewCode("unknown_json_data_type")
|
||||
CodeFailLoadPromotedPaths = errors.MustNewCode("fail_load_promoted_paths")
|
||||
CodeFailCheckPathPromoted = errors.MustNewCode("fail_check_path_promoted")
|
||||
CodeFailIterateBodyJSONKeys = errors.MustNewCode("fail_iterate_body_json_keys")
|
||||
CodeFailExtractBodyJSONKeys = errors.MustNewCode("fail_extract_body_json_keys")
|
||||
CodeFailLoadLogsJSONIndexes = errors.MustNewCode("fail_load_logs_json_indexes")
|
||||
CodeFailListJSONValues = errors.MustNewCode("fail_list_json_values")
|
||||
CodeFailScanJSONValue = errors.MustNewCode("fail_scan_json_value")
|
||||
CodeFailScanVariant = errors.MustNewCode("fail_scan_variant")
|
||||
CodeFailBuildJSONPathsQuery = errors.MustNewCode("fail_build_json_paths_query")
|
||||
CodeNoPathsToQueryIndexes = errors.MustNewCode("no_paths_to_query_indexes_provided")
|
||||
|
||||
CodeFailedToPrepareBatch = errors.MustNewCode("failed_to_prepare_batch_promoted_paths")
|
||||
CodeFailedToSendBatch = errors.MustNewCode("failed_to_send_batch_promoted_paths")
|
||||
CodeFailedToAppendPath = errors.MustNewCode("failed_to_append_path_promoted_paths")
|
||||
)
|
||||
|
||||
// enrichBodyKeys enriches body-context keys with promoted path info, indexes,
|
||||
// and JSON access plans.
|
||||
// parentTypeCache contains parent array types (ArrayJSON/ArrayDynamic) pre-fetched in the main UNION query.
|
||||
func (t *telemetryMetaStore) enrichBodyKeys(ctx context.Context, keys []*telemetrytypes.TelemetryFieldKey, parentTypeCache map[string][]telemetrytypes.FieldDataType) error {
|
||||
if len(keys) == 0 {
|
||||
return nil
|
||||
}
|
||||
// fetchBodyJSONPaths extracts body JSON paths from the path_types table
|
||||
// This function can be used by both JSONQueryBuilder and metadata extraction
|
||||
// uniquePathLimit: 0 for no limit, >0 for maximum number of unique paths to return
|
||||
// - For startup load: set to 10000 to get top 10k unique paths
|
||||
// - For lookup: set to 0 (no limit needed for single path)
|
||||
// - For metadata API: set to desired pagination limit
|
||||
//
|
||||
// searchOperator: LIKE for pattern matching, EQUAL for exact match.
|
||||
func (t *telemetryMetaStore) fetchBodyJSONPaths(ctx context.Context,
|
||||
fieldKeySelectors []*telemetrytypes.FieldKeySelector) ([]*telemetrytypes.TelemetryFieldKey, []string, bool, error) {
|
||||
ctx = ctxtypes.NewContextWithCommentVals(ctx, map[string]string{
|
||||
instrumentationtypes.TelemetrySignal: telemetrytypes.SignalLogs.StringValue(),
|
||||
instrumentationtypes.CodeNamespace: "metadata",
|
||||
instrumentationtypes.CodeFunctionName: "fetchBodyJSONPaths",
|
||||
})
|
||||
|
||||
var filteredKeys []*telemetrytypes.TelemetryFieldKey
|
||||
for _, key := range keys {
|
||||
if key.FieldContext == telemetrytypes.FieldContextBody {
|
||||
filteredKeys = append(filteredKeys, key)
|
||||
query, args, limit := buildGetBodyJSONPathsQuery(fieldKeySelectors)
|
||||
rows, err := t.telemetrystore.ClickhouseDB().Query(ctx, query, args...)
|
||||
if err != nil {
|
||||
return nil, nil, false, errors.WrapInternalf(err, CodeFailExtractBodyJSONKeys, "failed to extract body JSON keys")
|
||||
}
|
||||
defer rows.Close()
|
||||
|
||||
fieldKeys := []*telemetrytypes.TelemetryFieldKey{}
|
||||
paths := []string{}
|
||||
rowCount := 0
|
||||
for rows.Next() {
|
||||
var path string
|
||||
var typesArray []string // ClickHouse returns array as []string
|
||||
var lastSeen uint64
|
||||
|
||||
err = rows.Scan(&path, &typesArray, &lastSeen)
|
||||
if err != nil {
|
||||
return nil, nil, false, errors.WrapInternalf(err, CodeFailExtractBodyJSONKeys, "failed to scan body JSON key row")
|
||||
}
|
||||
|
||||
for _, typ := range typesArray {
|
||||
mapping, found := telemetrytypes.MappingStringToJSONDataType[typ]
|
||||
if !found {
|
||||
t.logger.ErrorContext(ctx, "failed to map type string to JSON data type", slog.String("type", typ), slog.String("path", path))
|
||||
continue
|
||||
}
|
||||
fieldKeys = append(fieldKeys, &telemetrytypes.TelemetryFieldKey{
|
||||
Name: path,
|
||||
Signal: telemetrytypes.SignalLogs,
|
||||
FieldContext: telemetrytypes.FieldContextBody,
|
||||
FieldDataType: telemetrytypes.MappingJSONDataTypeToFieldDataType[mapping],
|
||||
JSONDataType: &mapping,
|
||||
})
|
||||
}
|
||||
|
||||
paths = append(paths, path)
|
||||
rowCount++
|
||||
}
|
||||
if rows.Err() != nil {
|
||||
return nil, nil, false, errors.WrapInternalf(rows.Err(), CodeFailIterateBodyJSONKeys, "error iterating body JSON keys")
|
||||
}
|
||||
|
||||
// collect paths for batch queries
|
||||
paths := make([]string, 0, len(filteredKeys))
|
||||
for _, key := range filteredKeys {
|
||||
paths = append(paths, key.Name)
|
||||
}
|
||||
|
||||
// fetch promoted paths
|
||||
promoted, err := t.GetPromotedPaths(ctx, paths...)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
// fetch JSON path indexes
|
||||
indexes, err := t.getJSONPathIndexes(ctx, paths...)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
// apply promoted/index metadata to keys
|
||||
for _, key := range filteredKeys {
|
||||
promotedKey := strings.Split(key.Name, telemetrytypes.ArraySep)[0]
|
||||
key.Materialized = promoted[promotedKey]
|
||||
key.Indexes = indexes[key.Name]
|
||||
}
|
||||
|
||||
// build JSON access plans using the pre-fetched parent type cache
|
||||
return t.buildJSONPlans(ctx, filteredKeys, parentTypeCache)
|
||||
return fieldKeys, paths, rowCount <= limit, nil
|
||||
}
|
||||
|
||||
// buildJSONPlans builds JSON access plans for the given keys
|
||||
// using the provided parent type cache (pre-fetched in the main UNION query).
|
||||
func (t *telemetryMetaStore) buildJSONPlans(_ context.Context, keys []*telemetrytypes.TelemetryFieldKey, typeCache map[string][]telemetrytypes.FieldDataType) error {
|
||||
if len(keys) == 0 {
|
||||
return nil
|
||||
func (t *telemetryMetaStore) buildBodyJSONPaths(ctx context.Context,
|
||||
fieldKeySelectors []*telemetrytypes.FieldKeySelector) ([]*telemetrytypes.TelemetryFieldKey, bool, error) {
|
||||
|
||||
fieldKeys, paths, finished, err := t.fetchBodyJSONPaths(ctx, fieldKeySelectors)
|
||||
if err != nil {
|
||||
return nil, false, err
|
||||
}
|
||||
|
||||
columnMeta := t.jsonColumnMetadata[telemetrytypes.SignalLogs][telemetrytypes.FieldContextBody]
|
||||
promoted, err := t.GetPromotedPaths(ctx, paths...)
|
||||
if err != nil {
|
||||
return nil, false, err
|
||||
}
|
||||
|
||||
indexes, err := t.getJSONPathIndexes(ctx, paths...)
|
||||
if err != nil {
|
||||
return nil, false, err
|
||||
}
|
||||
|
||||
for _, fieldKey := range fieldKeys {
|
||||
promotedKey := strings.Split(fieldKey.Name, telemetrytypes.ArraySep)[0]
|
||||
fieldKey.Materialized = promoted[promotedKey]
|
||||
fieldKey.Indexes = indexes[fieldKey.Name]
|
||||
}
|
||||
|
||||
return fieldKeys, finished, t.buildJSONPlans(ctx, fieldKeys)
|
||||
}
|
||||
|
||||
func (t *telemetryMetaStore) buildJSONPlans(ctx context.Context, keys []*telemetrytypes.TelemetryFieldKey) error {
|
||||
parentSelectors := make([]*telemetrytypes.FieldKeySelector, 0, len(keys))
|
||||
for _, key := range keys {
|
||||
if err := key.SetJSONAccessPlan(columnMeta, typeCache); err != nil {
|
||||
parentSelectors = append(parentSelectors, key.ArrayParentSelectors()...)
|
||||
}
|
||||
|
||||
parentKeys, _, _, err := t.fetchBodyJSONPaths(ctx, parentSelectors)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
typeCache := make(map[string][]telemetrytypes.JSONDataType)
|
||||
for _, key := range parentKeys {
|
||||
typeCache[key.Name] = append(typeCache[key.Name], *key.JSONDataType)
|
||||
}
|
||||
|
||||
// build plans for keys now
|
||||
for _, key := range keys {
|
||||
err = key.SetJSONAccessPlan(t.jsonColumnMetadata[telemetrytypes.SignalLogs][telemetrytypes.FieldContextBody], typeCache)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
}
|
||||
@@ -94,6 +155,51 @@ func (t *telemetryMetaStore) buildJSONPlans(_ context.Context, keys []*telemetry
|
||||
return nil
|
||||
}
|
||||
|
||||
func buildGetBodyJSONPathsQuery(fieldKeySelectors []*telemetrytypes.FieldKeySelector) (string, []any, int) {
|
||||
if len(fieldKeySelectors) == 0 {
|
||||
return "", nil, defaultPathLimit
|
||||
}
|
||||
from := fmt.Sprintf("%s.%s", DBName, PathTypesTableName)
|
||||
|
||||
// Build a better query using GROUP BY to deduplicate at database level
|
||||
// This aggregates all types per path and gets the max last_seen, then applies LIMIT
|
||||
sb := sqlbuilder.Select(
|
||||
"path",
|
||||
"groupArray(DISTINCT type) AS types",
|
||||
"max(last_seen) AS last_seen",
|
||||
).From(from)
|
||||
|
||||
limit := 0
|
||||
// Add search filter if provided
|
||||
orClauses := []string{}
|
||||
for _, fieldKeySelector := range fieldKeySelectors {
|
||||
// replace [*] with []
|
||||
fieldKeySelector.Name = strings.ReplaceAll(fieldKeySelector.Name, telemetrytypes.ArrayAnyIndex, telemetrytypes.ArraySep)
|
||||
// Extract search text for body JSON keys
|
||||
keyName := CleanPathPrefixes(fieldKeySelector.Name)
|
||||
if fieldKeySelector.SelectorMatchType == telemetrytypes.FieldSelectorMatchTypeExact {
|
||||
orClauses = append(orClauses, sb.Equal("path", keyName))
|
||||
} else {
|
||||
// Pattern matching for metadata API (defaults to LIKE behavior for other operators)
|
||||
orClauses = append(orClauses, sb.ILike("path", fmt.Sprintf("%%%s%%", querybuilder.FormatValueForContains(keyName))))
|
||||
}
|
||||
limit += fieldKeySelector.Limit
|
||||
}
|
||||
sb.Where(sb.Or(orClauses...))
|
||||
// Group by path to get unique paths with aggregated types
|
||||
sb.GroupBy("path")
|
||||
|
||||
// Order by max last_seen to get most recent paths first
|
||||
sb.OrderBy("last_seen DESC")
|
||||
if limit == 0 {
|
||||
limit = defaultPathLimit
|
||||
}
|
||||
sb.Limit(limit)
|
||||
|
||||
query, args := sb.BuildWithFlavor(sqlbuilder.ClickHouse)
|
||||
return query, args, limit
|
||||
}
|
||||
|
||||
func (t *telemetryMetaStore) getJSONPathIndexes(ctx context.Context, paths ...string) (map[string][]telemetrytypes.JSONDataTypeIndex, error) {
|
||||
filteredPaths := []string{}
|
||||
for _, path := range paths {
|
||||
|
||||
@@ -7,9 +7,99 @@ import (
|
||||
"github.com/SigNoz/signoz-otel-collector/constants"
|
||||
"github.com/SigNoz/signoz/pkg/querybuilder"
|
||||
"github.com/SigNoz/signoz/pkg/telemetrylogs"
|
||||
"github.com/SigNoz/signoz/pkg/types/telemetrytypes"
|
||||
"github.com/stretchr/testify/require"
|
||||
)
|
||||
|
||||
func TestBuildGetBodyJSONPathsQuery(t *testing.T) {
|
||||
testCases := []struct {
|
||||
name string
|
||||
fieldKeySelectors []*telemetrytypes.FieldKeySelector
|
||||
expectedSQL string
|
||||
expectedArgs []any
|
||||
expectedLimit int
|
||||
}{
|
||||
|
||||
{
|
||||
name: "Single search text with EQUAL operator",
|
||||
fieldKeySelectors: []*telemetrytypes.FieldKeySelector{
|
||||
{
|
||||
Name: "user.name",
|
||||
SelectorMatchType: telemetrytypes.FieldSelectorMatchTypeExact,
|
||||
},
|
||||
},
|
||||
expectedSQL: "SELECT path, groupArray(DISTINCT type) AS types, max(last_seen) AS last_seen FROM signoz_metadata.distributed_json_path_types WHERE (path = ?) GROUP BY path ORDER BY last_seen DESC LIMIT ?",
|
||||
expectedArgs: []any{"user.name", defaultPathLimit},
|
||||
expectedLimit: defaultPathLimit,
|
||||
},
|
||||
{
|
||||
name: "Single search text with LIKE operator",
|
||||
fieldKeySelectors: []*telemetrytypes.FieldKeySelector{
|
||||
{
|
||||
Name: "user",
|
||||
SelectorMatchType: telemetrytypes.FieldSelectorMatchTypeFuzzy,
|
||||
},
|
||||
},
|
||||
expectedSQL: "SELECT path, groupArray(DISTINCT type) AS types, max(last_seen) AS last_seen FROM signoz_metadata.distributed_json_path_types WHERE (LOWER(path) LIKE LOWER(?)) GROUP BY path ORDER BY last_seen DESC LIMIT ?",
|
||||
expectedArgs: []any{"%user%", 100},
|
||||
expectedLimit: 100,
|
||||
},
|
||||
{
|
||||
name: "Multiple search texts with EQUAL operator",
|
||||
fieldKeySelectors: []*telemetrytypes.FieldKeySelector{
|
||||
{
|
||||
Name: "user.name",
|
||||
SelectorMatchType: telemetrytypes.FieldSelectorMatchTypeExact,
|
||||
},
|
||||
{
|
||||
Name: "user.age",
|
||||
SelectorMatchType: telemetrytypes.FieldSelectorMatchTypeExact,
|
||||
},
|
||||
},
|
||||
expectedSQL: "SELECT path, groupArray(DISTINCT type) AS types, max(last_seen) AS last_seen FROM signoz_metadata.distributed_json_path_types WHERE (path = ? OR path = ?) GROUP BY path ORDER BY last_seen DESC LIMIT ?",
|
||||
expectedArgs: []any{"user.name", "user.age", defaultPathLimit},
|
||||
expectedLimit: defaultPathLimit,
|
||||
},
|
||||
{
|
||||
name: "Multiple search texts with LIKE operator",
|
||||
fieldKeySelectors: []*telemetrytypes.FieldKeySelector{
|
||||
{
|
||||
Name: "user",
|
||||
SelectorMatchType: telemetrytypes.FieldSelectorMatchTypeFuzzy,
|
||||
},
|
||||
{
|
||||
Name: "admin",
|
||||
SelectorMatchType: telemetrytypes.FieldSelectorMatchTypeFuzzy,
|
||||
},
|
||||
},
|
||||
expectedSQL: "SELECT path, groupArray(DISTINCT type) AS types, max(last_seen) AS last_seen FROM signoz_metadata.distributed_json_path_types WHERE (LOWER(path) LIKE LOWER(?) OR LOWER(path) LIKE LOWER(?)) GROUP BY path ORDER BY last_seen DESC LIMIT ?",
|
||||
expectedArgs: []any{"%user%", "%admin%", defaultPathLimit},
|
||||
expectedLimit: defaultPathLimit,
|
||||
},
|
||||
{
|
||||
name: "Search with Contains operator (should default to LIKE)",
|
||||
fieldKeySelectors: []*telemetrytypes.FieldKeySelector{
|
||||
{
|
||||
Name: "test",
|
||||
SelectorMatchType: telemetrytypes.FieldSelectorMatchTypeFuzzy,
|
||||
},
|
||||
},
|
||||
expectedSQL: "SELECT path, groupArray(DISTINCT type) AS types, max(last_seen) AS last_seen FROM signoz_metadata.distributed_json_path_types WHERE (LOWER(path) LIKE LOWER(?)) GROUP BY path ORDER BY last_seen DESC LIMIT ?",
|
||||
expectedArgs: []any{"%test%", defaultPathLimit},
|
||||
expectedLimit: defaultPathLimit,
|
||||
},
|
||||
}
|
||||
|
||||
for _, tc := range testCases {
|
||||
t.Run(tc.name, func(t *testing.T) {
|
||||
query, args, limit := buildGetBodyJSONPathsQuery(tc.fieldKeySelectors)
|
||||
require.Equal(t, tc.expectedSQL, query)
|
||||
require.Equal(t, tc.expectedArgs, args)
|
||||
require.Equal(t, tc.expectedLimit, limit)
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
func TestBuildListLogsJSONIndexesQuery(t *testing.T) {
|
||||
testCases := []struct {
|
||||
name string
|
||||
|
||||
@@ -368,19 +368,6 @@ func (t *telemetryMetaStore) logsTblStatementToFieldKeys(ctx context.Context) ([
|
||||
return materialisedKeys, nil
|
||||
}
|
||||
|
||||
// logKeysUnionArm declares one arm of the UNION ALL in getLogsKeys.
|
||||
// All per-table variance is captured here so the loop body can stay uniform.
|
||||
type logKeysUnionArm struct {
|
||||
shouldQuery bool
|
||||
fieldContext telemetrytypes.FieldContext
|
||||
table string
|
||||
dataTypeColumn string // column used in WHERE/GROUP BY
|
||||
dataTypeSelectExpr string // expression used in SELECT (may wrap with lower())
|
||||
addBaseFilters func(sb *sqlbuilder.SelectBuilder) // mandatory WHERE filters (e.g., signal, field_context)
|
||||
encodeDataType func(telemetrytypes.FieldDataType) string // how to render a FieldDataType in WHERE values
|
||||
extraOrBranch func(sb *sqlbuilder.SelectBuilder) string // optional extra OR branch (e.g., body parent-types)
|
||||
}
|
||||
|
||||
// getLogsKeys returns the keys from the spans that match the field selection criteria.
|
||||
func (t *telemetryMetaStore) getLogsKeys(ctx context.Context, fieldKeySelectors []*telemetrytypes.FieldKeySelector) ([]*telemetrytypes.TelemetryFieldKey, bool, error) {
|
||||
ctx = ctxtypes.NewContextWithCommentVals(ctx, map[string]string{
|
||||
@@ -410,149 +397,90 @@ func (t *telemetryMetaStore) getLogsKeys(ctx context.Context, fieldKeySelectors
|
||||
// tables to query based on field selectors
|
||||
queryAttributeTable := false
|
||||
queryResourceTable := false
|
||||
queryBodyTable := false
|
||||
|
||||
for _, selector := range fieldKeySelectors {
|
||||
if selector.FieldContext == telemetrytypes.FieldContextUnspecified {
|
||||
// unspecified context, query all tables
|
||||
// unspecified context, query both tables
|
||||
queryAttributeTable = true
|
||||
queryResourceTable = true
|
||||
queryBodyTable = true
|
||||
break
|
||||
} else if selector.FieldContext == telemetrytypes.FieldContextAttribute {
|
||||
queryAttributeTable = true
|
||||
} else if selector.FieldContext == telemetrytypes.FieldContextResource {
|
||||
queryResourceTable = true
|
||||
} else if selector.FieldContext == telemetrytypes.FieldContextBody && querybuilder.BodyJSONQueryEnabled {
|
||||
queryBodyTable = true
|
||||
}
|
||||
}
|
||||
|
||||
// body keys are gated behind the feature flag
|
||||
queryBodyTable = queryBodyTable && querybuilder.BodyJSONQueryEnabled
|
||||
|
||||
// pre-compute parent array path names from body selectors for JSON plan building;
|
||||
// these will be fetched as a separate UNION arm filtered to ArrayJSON/ArrayDynamic only
|
||||
parentPaths := make(map[string]bool)
|
||||
if queryBodyTable {
|
||||
for _, sel := range fieldKeySelectors {
|
||||
if sel.FieldContext != telemetrytypes.FieldContextBody &&
|
||||
sel.FieldContext != telemetrytypes.FieldContextUnspecified {
|
||||
continue
|
||||
}
|
||||
if !strings.Contains(sel.Name, telemetrytypes.ArraySep) {
|
||||
continue
|
||||
}
|
||||
key := &telemetrytypes.TelemetryFieldKey{
|
||||
Name: sel.Name,
|
||||
Signal: telemetrytypes.SignalLogs,
|
||||
FieldContext: telemetrytypes.FieldContextBody,
|
||||
}
|
||||
for _, ps := range key.ArrayParentSelectors() {
|
||||
parentPaths[ps.Name] = true
|
||||
}
|
||||
}
|
||||
tablesToQuery := []struct {
|
||||
fieldContext telemetrytypes.FieldContext
|
||||
shouldQuery bool
|
||||
}{
|
||||
{telemetrytypes.FieldContextAttribute, queryAttributeTable},
|
||||
{telemetrytypes.FieldContextResource, queryResourceTable},
|
||||
}
|
||||
|
||||
// Each UNION arm differs only in: table, data-type column name and SELECT
|
||||
// expression (lower-wrapped for historical mixed-case in attr/resource),
|
||||
// base WHERE filters, the per-selector data-type encoding, and (for body)
|
||||
// an extra OR branch that fetches parent array types for JSON plan building.
|
||||
// All other logic is shared by the loop below.
|
||||
tablesToQuery := []logKeysUnionArm{
|
||||
{
|
||||
shouldQuery: queryAttributeTable,
|
||||
fieldContext: telemetrytypes.FieldContextAttribute,
|
||||
table: t.logsDBName + "." + t.logAttributeKeysTblName,
|
||||
dataTypeColumn: "datatype",
|
||||
dataTypeSelectExpr: "lower(datatype)",
|
||||
addBaseFilters: func(*sqlbuilder.SelectBuilder) {},
|
||||
encodeDataType: func(ft telemetrytypes.FieldDataType) string { return ft.TagDataType() },
|
||||
extraOrBranch: func(*sqlbuilder.SelectBuilder) string { return "" },
|
||||
},
|
||||
{
|
||||
shouldQuery: queryResourceTable,
|
||||
fieldContext: telemetrytypes.FieldContextResource,
|
||||
table: t.logsDBName + "." + t.logResourceKeysTblName,
|
||||
dataTypeColumn: "datatype",
|
||||
dataTypeSelectExpr: "lower(datatype)",
|
||||
addBaseFilters: func(*sqlbuilder.SelectBuilder) {},
|
||||
encodeDataType: func(ft telemetrytypes.FieldDataType) string { return ft.TagDataType() },
|
||||
extraOrBranch: func(*sqlbuilder.SelectBuilder) string { return "" },
|
||||
},
|
||||
{
|
||||
shouldQuery: queryBodyTable,
|
||||
fieldContext: telemetrytypes.FieldContextBody,
|
||||
table: fmt.Sprintf("%s.%s", DBName, FieldKeysTable),
|
||||
dataTypeColumn: "field_data_type",
|
||||
dataTypeSelectExpr: "field_data_type",
|
||||
addBaseFilters: func(sb *sqlbuilder.SelectBuilder) {
|
||||
sb.Where(sb.E("signal", telemetrytypes.SignalLogs.StringValue()))
|
||||
sb.Where(sb.E("field_context", telemetrytypes.FieldContextBody.StringValue()))
|
||||
},
|
||||
encodeDataType: func(ft telemetrytypes.FieldDataType) string { return ft.StringValue() },
|
||||
extraOrBranch: func(sb *sqlbuilder.SelectBuilder) string {
|
||||
if len(parentPaths) == 0 {
|
||||
return ""
|
||||
}
|
||||
names := make([]any, 0, len(parentPaths))
|
||||
for n := range parentPaths {
|
||||
names = append(names, n)
|
||||
}
|
||||
return sb.And(
|
||||
sb.In("name", names...),
|
||||
sb.In("field_data_type",
|
||||
telemetrytypes.FieldDataTypeArrayDynamic.StringValue(),
|
||||
telemetrytypes.FieldDataTypeArrayJSON.StringValue(),
|
||||
),
|
||||
)
|
||||
},
|
||||
},
|
||||
}
|
||||
|
||||
for _, arm := range tablesToQuery {
|
||||
if !arm.shouldQuery {
|
||||
for _, table := range tablesToQuery {
|
||||
if !table.shouldQuery {
|
||||
continue
|
||||
}
|
||||
|
||||
fieldContext := table.fieldContext
|
||||
|
||||
// table name based on field context
|
||||
var tblName string
|
||||
if fieldContext == telemetrytypes.FieldContextAttribute {
|
||||
tblName = t.logsDBName + "." + t.logAttributeKeysTblName
|
||||
} else {
|
||||
tblName = t.logsDBName + "." + t.logResourceKeysTblName
|
||||
}
|
||||
|
||||
sb := sqlbuilder.Select(
|
||||
"name AS tag_key",
|
||||
fmt.Sprintf("'%s' AS tag_type", arm.fieldContext.TagType()),
|
||||
arm.dataTypeSelectExpr+" AS tag_data_type",
|
||||
fmt.Sprintf("%d AS priority", getPriorityForContext(arm.fieldContext)),
|
||||
).From(arm.table)
|
||||
fmt.Sprintf("'%s' AS tag_type", fieldContext.TagType()),
|
||||
"lower(datatype) AS tag_data_type", // in logs, we had some historical data with capital and small case
|
||||
fmt.Sprintf(`%d AS priority`, getPriorityForContext(fieldContext)),
|
||||
).From(tblName)
|
||||
|
||||
arm.addBaseFilters(sb)
|
||||
var limit int
|
||||
conds := []string{}
|
||||
|
||||
branches := []string{}
|
||||
for _, sel := range fieldKeySelectors {
|
||||
if sel.FieldContext != telemetrytypes.FieldContextUnspecified && sel.FieldContext != arm.fieldContext {
|
||||
for _, fieldKeySelector := range fieldKeySelectors {
|
||||
// Include this selector if:
|
||||
// 1. It has unspecified context (matches all tables)
|
||||
// 2. Its context matches the current table's context
|
||||
if fieldKeySelector.FieldContext != telemetrytypes.FieldContextUnspecified &&
|
||||
fieldKeySelector.FieldContext != fieldContext {
|
||||
continue
|
||||
}
|
||||
parts := []string{}
|
||||
if sel.SelectorMatchType == telemetrytypes.FieldSelectorMatchTypeExact {
|
||||
parts = append(parts, sb.E("name", sel.Name))
|
||||
|
||||
// key part of the selector
|
||||
fieldKeyConds := []string{}
|
||||
if fieldKeySelector.SelectorMatchType == telemetrytypes.FieldSelectorMatchTypeExact {
|
||||
fieldKeyConds = append(fieldKeyConds, sb.E("name", fieldKeySelector.Name))
|
||||
} else {
|
||||
parts = append(parts, sb.ILike("name", "%"+escapeForLike(sel.Name)+"%"))
|
||||
fieldKeyConds = append(fieldKeyConds, sb.ILike("name", "%"+escapeForLike(fieldKeySelector.Name)+"%"))
|
||||
}
|
||||
if sel.FieldDataType != telemetrytypes.FieldDataTypeUnspecified {
|
||||
parts = append(parts, sb.E(arm.dataTypeColumn, arm.encodeDataType(sel.FieldDataType)))
|
||||
|
||||
// now look at the field data type
|
||||
if fieldKeySelector.FieldDataType != telemetrytypes.FieldDataTypeUnspecified {
|
||||
fieldKeyConds = append(fieldKeyConds, sb.E("datatype", fieldKeySelector.FieldDataType.TagDataType()))
|
||||
}
|
||||
if len(parts) > 0 {
|
||||
branches = append(branches, sb.And(parts...))
|
||||
|
||||
if len(fieldKeyConds) > 0 {
|
||||
conds = append(conds, sb.And(fieldKeyConds...))
|
||||
}
|
||||
limit += fieldKeySelector.Limit
|
||||
}
|
||||
|
||||
if extra := arm.extraOrBranch(sb); extra != "" {
|
||||
branches = append(branches, extra)
|
||||
if len(conds) > 0 {
|
||||
sb.Where(sb.Or(conds...))
|
||||
}
|
||||
|
||||
if len(branches) > 0 {
|
||||
sb.Where(sb.Or(branches...))
|
||||
sb.GroupBy("name", "datatype")
|
||||
if limit == 0 {
|
||||
limit = 1000
|
||||
}
|
||||
|
||||
sb.GroupBy("name", arm.dataTypeColumn)
|
||||
|
||||
query, args := sb.BuildWithFlavor(sqlbuilder.ClickHouse)
|
||||
queries = append(queries, query)
|
||||
allArgs = append(allArgs, args...)
|
||||
@@ -589,7 +517,6 @@ func (t *telemetryMetaStore) getLogsKeys(ctx context.Context, fieldKeySelectors
|
||||
defer rows.Close()
|
||||
|
||||
keys := []*telemetrytypes.TelemetryFieldKey{}
|
||||
parentTypeCache := make(map[string][]telemetrytypes.FieldDataType)
|
||||
rowCount := 0
|
||||
searchTexts := []string{}
|
||||
|
||||
@@ -613,17 +540,6 @@ func (t *telemetryMetaStore) getLogsKeys(ctx context.Context, fieldKeySelectors
|
||||
if err != nil {
|
||||
return nil, false, errors.Wrap(err, errors.TypeInternal, errors.CodeInternal, ErrFailedToGetLogsKeys.Error())
|
||||
}
|
||||
|
||||
// body keys with ArrayJSON/ArrayDynamic types are internal container types
|
||||
// used only for JSON access plan building; route to parentTypeCache, not to results
|
||||
switch fieldDataType {
|
||||
case telemetrytypes.FieldDataTypeArrayJSON, telemetrytypes.FieldDataTypeArrayDynamic:
|
||||
if fieldContext == telemetrytypes.FieldContextBody && parentPaths[name] {
|
||||
parentTypeCache[name] = append(parentTypeCache[name], fieldDataType)
|
||||
continue
|
||||
}
|
||||
}
|
||||
|
||||
key, ok := mapOfKeys[name+";"+fieldContext.StringValue()+";"+fieldDataType.StringValue()]
|
||||
|
||||
// if there is no materialised column, create a key with the field context and data type
|
||||
@@ -677,11 +593,13 @@ func (t *telemetryMetaStore) getLogsKeys(ctx context.Context, fieldKeySelectors
|
||||
}
|
||||
}
|
||||
|
||||
// enrich body keys with promoted paths, indexes, and JSON access plans
|
||||
if querybuilder.BodyJSONQueryEnabled {
|
||||
if err := t.enrichBodyKeys(ctx, keys, parentTypeCache); err != nil {
|
||||
t.logger.ErrorContext(ctx, "failed to enrich body JSON keys", errors.Attr(err))
|
||||
bodyJSONPaths, finished, err := t.buildBodyJSONPaths(ctx, fieldKeySelectors) // LIKE for pattern matching
|
||||
if err != nil {
|
||||
t.logger.ErrorContext(ctx, "failed to extract body JSON paths", errors.Attr(err))
|
||||
}
|
||||
keys = append(keys, bodyJSONPaths...)
|
||||
complete = complete && finished
|
||||
}
|
||||
|
||||
if _, err := t.updateColumnEvolutionMetadataForKeys(ctx, keys); err != nil {
|
||||
|
||||
@@ -1,11 +1,13 @@
|
||||
package telemetrymetadata
|
||||
|
||||
import otelcollectorconst "github.com/SigNoz/signoz-otel-collector/constants"
|
||||
|
||||
const (
|
||||
DBName = "signoz_metadata"
|
||||
AttributesMetadataTableName = "distributed_attributes_metadata"
|
||||
AttributesMetadataLocalTableName = "attributes_metadata"
|
||||
ColumnEvolutionMetadataTableName = "distributed_column_evolution_metadata"
|
||||
FieldKeysTable = "distributed_field_keys"
|
||||
PathTypesTableName = otelcollectorconst.DistributedPathTypesTable
|
||||
// Column Evolution table stores promoted paths as (signal, column_name, field_context, field_name); see signoz-otel-collector metadata_migrations.
|
||||
PromotedPathsTableName = "distributed_column_evolution_metadata"
|
||||
SkipIndexTableName = "system.data_skipping_indices"
|
||||
|
||||
@@ -37,6 +37,7 @@ type TelemetryFieldKey struct {
|
||||
FieldContext FieldContext `json:"fieldContext,omitzero"`
|
||||
FieldDataType FieldDataType `json:"fieldDataType,omitzero"`
|
||||
|
||||
JSONDataType *JSONDataType `json:"-"`
|
||||
JSONPlan JSONAccessPlan `json:"-"`
|
||||
Indexes []JSONDataTypeIndex `json:"-"`
|
||||
Materialized bool `json:"-"` // refers to promoted in case of body.... fields
|
||||
@@ -79,12 +80,6 @@ func (f *TelemetryFieldKey) ArrayParentSelectors() []*FieldKeySelector {
|
||||
return selectors
|
||||
}
|
||||
|
||||
// GetJSONDataType derives the JSONDataType from FieldDataType.
|
||||
// Callers should check FieldDataType != FieldDataTypeUnspecified before calling.
|
||||
func (f *TelemetryFieldKey) GetJSONDataType() JSONDataType {
|
||||
return MappingFieldDataTypeToJSONDataType[f.FieldDataType]
|
||||
}
|
||||
|
||||
func (f TelemetryFieldKey) String() string {
|
||||
var sb strings.Builder
|
||||
fmt.Fprintf(&sb, "name=%s", f.Name)
|
||||
@@ -97,6 +92,9 @@ func (f TelemetryFieldKey) String() string {
|
||||
if f.Materialized {
|
||||
sb.WriteString(",materialized=true")
|
||||
}
|
||||
if f.JSONDataType != nil {
|
||||
fmt.Fprintf(&sb, ",jsondatatype=%s", f.JSONDataType.StringValue())
|
||||
}
|
||||
if len(f.Indexes) > 0 {
|
||||
sb.WriteString(",indexes=[")
|
||||
for i, index := range f.Indexes {
|
||||
@@ -119,6 +117,7 @@ func (f TelemetryFieldKey) Text() string {
|
||||
func (f *TelemetryFieldKey) OverrideMetadataFrom(src *TelemetryFieldKey) {
|
||||
f.FieldContext = src.FieldContext
|
||||
f.FieldDataType = src.FieldDataType
|
||||
f.JSONDataType = src.JSONDataType
|
||||
f.Indexes = src.Indexes
|
||||
f.Materialized = src.Materialized
|
||||
f.JSONPlan = src.JSONPlan
|
||||
|
||||
@@ -31,7 +31,7 @@ var (
|
||||
FieldDataTypeArrayInt64 = FieldDataType{valuer.NewString("[]int64")}
|
||||
FieldDataTypeArrayNumber = FieldDataType{valuer.NewString("[]number")}
|
||||
|
||||
FieldDataTypeArrayJSON = FieldDataType{valuer.NewString("[]object")}
|
||||
FieldDataTypeArrayObject = FieldDataType{valuer.NewString("[]object")}
|
||||
FieldDataTypeArrayDynamic = FieldDataType{valuer.NewString("[]dynamic")}
|
||||
|
||||
// Map string representations to FieldDataType values
|
||||
@@ -72,8 +72,6 @@ var (
|
||||
"[]float64": FieldDataTypeArrayFloat64,
|
||||
"[]number": FieldDataTypeArrayNumber,
|
||||
"[]bool": FieldDataTypeArrayBool,
|
||||
"[]json": FieldDataTypeArrayJSON,
|
||||
"[]dynamic": FieldDataTypeArrayDynamic,
|
||||
|
||||
// c-style array types
|
||||
"string[]": FieldDataTypeArrayString,
|
||||
@@ -81,8 +79,6 @@ var (
|
||||
"float64[]": FieldDataTypeArrayFloat64,
|
||||
"number[]": FieldDataTypeArrayNumber,
|
||||
"bool[]": FieldDataTypeArrayBool,
|
||||
"json[]": FieldDataTypeArrayJSON,
|
||||
"dynamic[]": FieldDataTypeArrayDynamic,
|
||||
}
|
||||
|
||||
fieldDataTypeToCHDataType = map[FieldDataType]string{
|
||||
|
||||
@@ -43,7 +43,7 @@ type JSONAccessNode struct {
|
||||
isRoot bool // marked true for only body_v2 and body_promoted
|
||||
|
||||
// Precomputed type information (single source of truth)
|
||||
AvailableTypes []FieldDataType
|
||||
AvailableTypes []JSONDataType
|
||||
|
||||
// Array type branches (Array(JSON) vs Array(Dynamic))
|
||||
Branches map[JSONAccessBranchType]*JSONAccessNode
|
||||
@@ -106,7 +106,7 @@ type planBuilder struct {
|
||||
paths []string // cumulative paths for type cache lookups
|
||||
segments []string // individual path segments for node names
|
||||
isPromoted bool
|
||||
typeCache map[string][]FieldDataType
|
||||
typeCache map[string][]JSONDataType
|
||||
}
|
||||
|
||||
// buildPlan recursively builds the path plan tree.
|
||||
@@ -155,14 +155,14 @@ func (pb *planBuilder) buildPlan(index int, parent *JSONAccessNode, isDynArrChil
|
||||
MaxDynamicPaths: maxPaths,
|
||||
}
|
||||
|
||||
hasJSON := slices.Contains(node.AvailableTypes, FieldDataTypeArrayJSON)
|
||||
hasDynamic := slices.Contains(node.AvailableTypes, FieldDataTypeArrayDynamic)
|
||||
hasJSON := slices.Contains(node.AvailableTypes, ArrayJSON)
|
||||
hasDynamic := slices.Contains(node.AvailableTypes, ArrayDynamic)
|
||||
|
||||
// Configure terminal if this is the last part
|
||||
if isTerminal {
|
||||
node.TerminalConfig = &TerminalConfig{
|
||||
Key: pb.key,
|
||||
ElemType: pb.key.GetJSONDataType(),
|
||||
ElemType: *pb.key.JSONDataType,
|
||||
}
|
||||
} else {
|
||||
var err error
|
||||
@@ -185,7 +185,7 @@ func (pb *planBuilder) buildPlan(index int, parent *JSONAccessNode, isDynArrChil
|
||||
|
||||
// buildJSONAccessPlan builds a tree structure representing the complete JSON path traversal
|
||||
// that precomputes all possible branches and their types.
|
||||
func (key *TelemetryFieldKey) SetJSONAccessPlan(columnInfo JSONColumnMetadata, typeCache map[string][]FieldDataType,
|
||||
func (key *TelemetryFieldKey) SetJSONAccessPlan(columnInfo JSONColumnMetadata, typeCache map[string][]JSONDataType,
|
||||
) error {
|
||||
// if path is empty, return nil
|
||||
if key.Name == "" {
|
||||
|
||||
@@ -19,11 +19,11 @@ const (
|
||||
// ============================================================================
|
||||
|
||||
// makeKey creates a TelemetryFieldKey for testing.
|
||||
func makeKey(name string, dataType FieldDataType, materialized bool) *TelemetryFieldKey {
|
||||
func makeKey(name string, dataType JSONDataType, materialized bool) *TelemetryFieldKey {
|
||||
return &TelemetryFieldKey{
|
||||
Name: name,
|
||||
FieldDataType: dataType,
|
||||
Materialized: materialized,
|
||||
Name: name,
|
||||
JSONDataType: &dataType,
|
||||
Materialized: materialized,
|
||||
}
|
||||
}
|
||||
|
||||
@@ -242,7 +242,7 @@ func TestPlanJSON_BasicStructure(t *testing.T) {
|
||||
}{
|
||||
{
|
||||
name: "Simple path not promoted",
|
||||
key: makeKey("user.name", FieldDataTypeString, false),
|
||||
key: makeKey("user.name", String, false),
|
||||
expectedYAML: fmt.Sprintf(`
|
||||
- name: user.name
|
||||
column: %s
|
||||
@@ -255,7 +255,7 @@ func TestPlanJSON_BasicStructure(t *testing.T) {
|
||||
},
|
||||
{
|
||||
name: "Simple path promoted",
|
||||
key: makeKey("user.name", FieldDataTypeString, true),
|
||||
key: makeKey("user.name", String, true),
|
||||
expectedYAML: fmt.Sprintf(`
|
||||
- name: user.name
|
||||
column: %s
|
||||
@@ -276,7 +276,7 @@ func TestPlanJSON_BasicStructure(t *testing.T) {
|
||||
},
|
||||
{
|
||||
name: "Empty path returns error",
|
||||
key: makeKey("", FieldDataTypeString, false),
|
||||
key: makeKey("", String, false),
|
||||
expectErr: true,
|
||||
expectedYAML: "",
|
||||
},
|
||||
@@ -431,7 +431,7 @@ func TestPlanJSON_ArrayPaths(t *testing.T) {
|
||||
|
||||
for _, tt := range tests {
|
||||
t.Run(tt.name, func(t *testing.T) {
|
||||
key := makeKey(tt.path, FieldDataTypeString, false)
|
||||
key := makeKey(tt.path, String, false)
|
||||
err := key.SetJSONAccessPlan(JSONColumnMetadata{
|
||||
BaseColumn: bodyV2Column,
|
||||
PromotedColumn: bodyPromotedColumn,
|
||||
@@ -450,7 +450,7 @@ func TestPlanJSON_PromotedVsNonPromoted(t *testing.T) {
|
||||
path := "education[].awards[].type"
|
||||
|
||||
t.Run("Non-promoted plan", func(t *testing.T) {
|
||||
key := makeKey(path, FieldDataTypeString, false)
|
||||
key := makeKey(path, String, false)
|
||||
err := key.SetJSONAccessPlan(JSONColumnMetadata{
|
||||
BaseColumn: bodyV2Column,
|
||||
PromotedColumn: bodyPromotedColumn,
|
||||
@@ -493,7 +493,7 @@ func TestPlanJSON_PromotedVsNonPromoted(t *testing.T) {
|
||||
})
|
||||
|
||||
t.Run("Promoted plan", func(t *testing.T) {
|
||||
key := makeKey(path, FieldDataTypeString, true)
|
||||
key := makeKey(path, String, true)
|
||||
err := key.SetJSONAccessPlan(JSONColumnMetadata{
|
||||
BaseColumn: bodyV2Column,
|
||||
PromotedColumn: bodyPromotedColumn,
|
||||
@@ -666,12 +666,12 @@ func TestPlanJSON_EdgeCases(t *testing.T) {
|
||||
for _, tt := range tests {
|
||||
t.Run(tt.name, func(t *testing.T) {
|
||||
// Choose key type based on path; operator does not affect the tree shape asserted here.
|
||||
keyType := FieldDataTypeString
|
||||
keyType := String
|
||||
switch tt.path {
|
||||
case "education":
|
||||
keyType = FieldDataTypeArrayJSON
|
||||
keyType = ArrayJSON
|
||||
case "education[].type":
|
||||
keyType = FieldDataTypeString
|
||||
keyType = String
|
||||
}
|
||||
key := makeKey(tt.path, keyType, false)
|
||||
err := key.SetJSONAccessPlan(JSONColumnMetadata{
|
||||
@@ -692,7 +692,7 @@ func TestPlanJSON_EdgeCases(t *testing.T) {
|
||||
func TestPlanJSON_TreeStructure(t *testing.T) {
|
||||
types, _ := TestJSONTypeSet()
|
||||
path := "education[].awards[].participated[].team[].branch"
|
||||
key := makeKey(path, FieldDataTypeString, false)
|
||||
key := makeKey(path, String, false)
|
||||
err := key.SetJSONAccessPlan(JSONColumnMetadata{
|
||||
BaseColumn: bodyV2Column,
|
||||
PromotedColumn: bodyPromotedColumn,
|
||||
|
||||
@@ -46,6 +46,14 @@ var MappingStringToJSONDataType = map[string]JSONDataType{
|
||||
"Array(JSON)": ArrayJSON,
|
||||
}
|
||||
|
||||
var ScalerTypeToArrayType = map[JSONDataType]JSONDataType{
|
||||
String: ArrayString,
|
||||
Int64: ArrayInt64,
|
||||
Float64: ArrayFloat64,
|
||||
Bool: ArrayBool,
|
||||
Dynamic: ArrayDynamic,
|
||||
}
|
||||
|
||||
var MappingFieldDataTypeToJSONDataType = map[FieldDataType]JSONDataType{
|
||||
FieldDataTypeString: String,
|
||||
FieldDataTypeInt64: Int64,
|
||||
@@ -55,8 +63,18 @@ var MappingFieldDataTypeToJSONDataType = map[FieldDataType]JSONDataType{
|
||||
FieldDataTypeArrayString: ArrayString,
|
||||
FieldDataTypeArrayInt64: ArrayInt64,
|
||||
FieldDataTypeArrayFloat64: ArrayFloat64,
|
||||
FieldDataTypeArrayNumber: ArrayFloat64,
|
||||
FieldDataTypeArrayBool: ArrayBool,
|
||||
FieldDataTypeArrayDynamic: ArrayDynamic,
|
||||
FieldDataTypeArrayJSON: ArrayJSON,
|
||||
}
|
||||
|
||||
var MappingJSONDataTypeToFieldDataType = map[JSONDataType]FieldDataType{
|
||||
String: FieldDataTypeString,
|
||||
Int64: FieldDataTypeInt64,
|
||||
Float64: FieldDataTypeFloat64,
|
||||
Bool: FieldDataTypeBool,
|
||||
ArrayString: FieldDataTypeArrayString,
|
||||
ArrayInt64: FieldDataTypeArrayInt64,
|
||||
ArrayFloat64: FieldDataTypeArrayFloat64,
|
||||
ArrayBool: FieldDataTypeArrayBool,
|
||||
ArrayDynamic: FieldDataTypeArrayDynamic,
|
||||
ArrayJSON: FieldDataTypeArrayObject,
|
||||
}
|
||||
|
||||
@@ -4,69 +4,69 @@ package telemetrytypes
|
||||
// Test JSON Type Set Data Setup
|
||||
// ============================================================================
|
||||
|
||||
// TestJSONTypeSet returns a map of path->field data types for testing.
|
||||
// TestJSONTypeSet returns a map of path->types for testing.
|
||||
// This represents the type information available in the test JSON structure.
|
||||
func TestJSONTypeSet() (map[string][]FieldDataType, MetadataStore) {
|
||||
types := map[string][]FieldDataType{
|
||||
func TestJSONTypeSet() (map[string][]JSONDataType, MetadataStore) {
|
||||
types := map[string][]JSONDataType{
|
||||
|
||||
// ── user (primitives) ─────────────────────────────────────────────
|
||||
"user.name": {FieldDataTypeString},
|
||||
"user.permissions": {FieldDataTypeArrayString},
|
||||
"user.age": {FieldDataTypeInt64, FieldDataTypeString}, // Int64/String ambiguity
|
||||
"user.height": {FieldDataTypeFloat64},
|
||||
"user.active": {FieldDataTypeBool}, // Bool — not IndexSupported
|
||||
"user.name": {String},
|
||||
"user.permissions": {ArrayString},
|
||||
"user.age": {Int64, String}, // Int64/String ambiguity
|
||||
"user.height": {Float64},
|
||||
"user.active": {Bool}, // Bool — not IndexSupported
|
||||
|
||||
// Deeper non-array nesting (a.b.c — no array hops)
|
||||
"user.address.zip": {FieldDataTypeInt64},
|
||||
"user.address.zip": {Int64},
|
||||
|
||||
// ── education[] ───────────────────────────────────────────────────
|
||||
// Pattern: x[].y
|
||||
"education": {FieldDataTypeArrayJSON},
|
||||
"education[].name": {FieldDataTypeString},
|
||||
"education[].type": {FieldDataTypeString, FieldDataTypeInt64},
|
||||
"education[].year": {FieldDataTypeInt64},
|
||||
"education[].scores": {FieldDataTypeArrayInt64},
|
||||
"education[].parameters": {FieldDataTypeArrayFloat64, FieldDataTypeArrayDynamic},
|
||||
"education": {ArrayJSON},
|
||||
"education[].name": {String},
|
||||
"education[].type": {String, Int64},
|
||||
"education[].year": {Int64},
|
||||
"education[].scores": {ArrayInt64},
|
||||
"education[].parameters": {ArrayFloat64, ArrayDynamic},
|
||||
|
||||
// Pattern: x[].y[]
|
||||
"education[].awards": {FieldDataTypeArrayDynamic, FieldDataTypeArrayJSON},
|
||||
"education[].awards": {ArrayDynamic, ArrayJSON},
|
||||
|
||||
// Pattern: x[].y[].z
|
||||
"education[].awards[].name": {FieldDataTypeString},
|
||||
"education[].awards[].type": {FieldDataTypeString},
|
||||
"education[].awards[].semester": {FieldDataTypeInt64},
|
||||
"education[].awards[].name": {String},
|
||||
"education[].awards[].type": {String},
|
||||
"education[].awards[].semester": {Int64},
|
||||
|
||||
// Pattern: x[].y[].z[]
|
||||
"education[].awards[].participated": {FieldDataTypeArrayDynamic, FieldDataTypeArrayJSON},
|
||||
"education[].awards[].participated": {ArrayDynamic, ArrayJSON},
|
||||
|
||||
// Pattern: x[].y[].z[].w
|
||||
"education[].awards[].participated[].members": {FieldDataTypeArrayString},
|
||||
"education[].awards[].participated[].members": {ArrayString},
|
||||
|
||||
// Pattern: x[].y[].z[].w[]
|
||||
"education[].awards[].participated[].team": {FieldDataTypeArrayJSON},
|
||||
"education[].awards[].participated[].team": {ArrayJSON},
|
||||
|
||||
// Pattern: x[].y[].z[].w[].v
|
||||
"education[].awards[].participated[].team[].branch": {FieldDataTypeString},
|
||||
"education[].awards[].participated[].team[].branch": {String},
|
||||
|
||||
// ── interests[] ───────────────────────────────────────────────────
|
||||
"interests": {FieldDataTypeArrayJSON},
|
||||
"interests[].entities": {FieldDataTypeArrayJSON},
|
||||
"interests[].entities[].reviews": {FieldDataTypeArrayJSON},
|
||||
"interests[].entities[].reviews[].entries": {FieldDataTypeArrayJSON},
|
||||
"interests[].entities[].reviews[].entries[].metadata": {FieldDataTypeArrayJSON},
|
||||
"interests[].entities[].reviews[].entries[].metadata[].positions": {FieldDataTypeArrayJSON},
|
||||
"interests[].entities[].reviews[].entries[].metadata[].positions[].name": {FieldDataTypeString},
|
||||
"interests[].entities[].reviews[].entries[].metadata[].positions[].ratings": {FieldDataTypeArrayInt64, FieldDataTypeArrayString},
|
||||
"http-events": {FieldDataTypeArrayJSON},
|
||||
"http-events[].request-info.host": {FieldDataTypeString},
|
||||
"ids": {FieldDataTypeArrayDynamic},
|
||||
"interests": {ArrayJSON},
|
||||
"interests[].entities": {ArrayJSON},
|
||||
"interests[].entities[].reviews": {ArrayJSON},
|
||||
"interests[].entities[].reviews[].entries": {ArrayJSON},
|
||||
"interests[].entities[].reviews[].entries[].metadata": {ArrayJSON},
|
||||
"interests[].entities[].reviews[].entries[].metadata[].positions": {ArrayJSON},
|
||||
"interests[].entities[].reviews[].entries[].metadata[].positions[].name": {String},
|
||||
"interests[].entities[].reviews[].entries[].metadata[].positions[].ratings": {ArrayInt64, ArrayString},
|
||||
"http-events": {ArrayJSON},
|
||||
"http-events[].request-info.host": {String},
|
||||
"ids": {ArrayDynamic},
|
||||
|
||||
// ── top-level primitives ──────────────────────────────────────────
|
||||
"message": {FieldDataTypeString},
|
||||
"http-status": {FieldDataTypeInt64, FieldDataTypeString}, // hyphen in root key, ambiguous
|
||||
"message": {String},
|
||||
"http-status": {Int64, String}, // hyphen in root key, ambiguous
|
||||
|
||||
// ── top-level nested objects (no array hops) ───────────────────────
|
||||
"response.time-taken": {FieldDataTypeFloat64}, // hyphen inside nested key
|
||||
"response.time-taken": {Float64}, // hyphen inside nested key
|
||||
}
|
||||
|
||||
return types, nil
|
||||
|
||||
@@ -23,6 +23,7 @@ pytest_plugins = [
|
||||
"fixtures.notification_channel",
|
||||
"fixtures.alerts",
|
||||
"fixtures.cloudintegrations",
|
||||
"fixtures.jsontypeexporter",
|
||||
]
|
||||
|
||||
|
||||
|
||||
437
tests/integration/fixtures/jsontypeexporter.py
Normal file
437
tests/integration/fixtures/jsontypeexporter.py
Normal file
@@ -0,0 +1,437 @@
|
||||
"""
|
||||
Simpler version of jsontypeexporter for test fixtures.
|
||||
This exports JSON type metadata to the path_types table by parsing JSON bodies
|
||||
and extracting all paths with their types, similar to how the real jsontypeexporter works.
|
||||
"""
|
||||
|
||||
import datetime
|
||||
import json
|
||||
from abc import ABC
|
||||
from typing import (
|
||||
TYPE_CHECKING,
|
||||
Any,
|
||||
Callable,
|
||||
Dict,
|
||||
Generator,
|
||||
List,
|
||||
Optional,
|
||||
Set,
|
||||
Union,
|
||||
)
|
||||
|
||||
import numpy as np
|
||||
import pytest
|
||||
|
||||
from fixtures import types
|
||||
|
||||
if TYPE_CHECKING:
|
||||
pass
|
||||
|
||||
|
||||
class JSONPathType(ABC):
|
||||
"""Represents a JSON path with its type information"""
|
||||
|
||||
path: str
|
||||
type: str
|
||||
last_seen: np.uint64
|
||||
|
||||
def __init__(
|
||||
self,
|
||||
path: str,
|
||||
type: str, # pylint: disable=redefined-builtin
|
||||
last_seen: Optional[datetime.datetime] = None,
|
||||
) -> None:
|
||||
self.path = path
|
||||
self.type = type
|
||||
if last_seen is None:
|
||||
last_seen = datetime.datetime.now()
|
||||
self.last_seen = np.uint64(int(last_seen.timestamp() * 1e9))
|
||||
|
||||
def np_arr(self) -> np.array:
|
||||
"""Return path type data as numpy array for database insertion"""
|
||||
return np.array([self.path, self.type, self.last_seen])
|
||||
|
||||
|
||||
# Constants matching jsontypeexporter
|
||||
ARRAY_SEPARATOR = "[]." # Used in paths like "education[].name"
|
||||
ARRAY_SUFFIX = "[]" # Used when traversing into array element objects
|
||||
|
||||
|
||||
def _infer_array_type_from_type_strings(types: List[str]) -> Optional[str]:
|
||||
"""
|
||||
Infer array type from a list of pre-classified type strings.
|
||||
Matches jsontypeexporter's inferArrayMask logic (v0.144.2+).
|
||||
|
||||
Type strings are: "JSON", "String", "Bool", "Float64", "Int64"
|
||||
|
||||
SuperTyping rules (matching Go inferArrayMask):
|
||||
- JSON alone → Array(JSON)
|
||||
- JSON + any primitive → Array(Dynamic)
|
||||
- String alone → Array(Nullable(String)); String + other → Array(Dynamic)
|
||||
- Float64 wins over Int64 and Bool
|
||||
- Int64 wins over Bool
|
||||
- Bool alone → Array(Nullable(Bool))
|
||||
"""
|
||||
if len(types) == 0:
|
||||
return None
|
||||
|
||||
unique = set(types)
|
||||
|
||||
has_json = "JSON" in unique
|
||||
# hasPrimitive mirrors Go: (hasJSON && len(unique) > 1) || (!hasJSON && len(unique) > 0)
|
||||
has_primitive = (has_json and len(unique) > 1) or (not has_json and len(unique) > 0)
|
||||
|
||||
if has_json:
|
||||
if not has_primitive:
|
||||
return "Array(JSON)"
|
||||
return "Array(Dynamic)"
|
||||
|
||||
# ---- Primitive Type Resolution (Float > Int > Bool) ----
|
||||
if "String" in unique:
|
||||
if len(unique) > 1:
|
||||
return "Array(Dynamic)"
|
||||
return "Array(Nullable(String))"
|
||||
|
||||
if "Float64" in unique:
|
||||
return "Array(Nullable(Float64))"
|
||||
if "Int64" in unique:
|
||||
return "Array(Nullable(Int64))"
|
||||
if "Bool" in unique:
|
||||
return "Array(Nullable(Bool))"
|
||||
|
||||
return "Array(Dynamic)"
|
||||
|
||||
|
||||
def _infer_array_type(elements: List[Any]) -> Optional[str]:
|
||||
"""
|
||||
Infer array type from raw Python list elements.
|
||||
Classifies each element then delegates to _infer_array_type_from_type_strings.
|
||||
"""
|
||||
if len(elements) == 0:
|
||||
return None
|
||||
|
||||
types = []
|
||||
for elem in elements:
|
||||
if elem is None:
|
||||
continue
|
||||
if isinstance(elem, dict):
|
||||
types.append("JSON")
|
||||
elif isinstance(elem, str):
|
||||
types.append("String")
|
||||
elif isinstance(elem, bool): # must be before int (bool is subclass of int)
|
||||
types.append("Bool")
|
||||
elif isinstance(elem, float):
|
||||
types.append("Float64")
|
||||
elif isinstance(elem, int):
|
||||
types.append("Int64")
|
||||
|
||||
return _infer_array_type_from_type_strings(types)
|
||||
|
||||
|
||||
def _python_type_to_clickhouse_type(value: Any) -> str:
|
||||
"""
|
||||
Convert Python type to ClickHouse JSON type string.
|
||||
Maps Python types to ClickHouse JSON data types.
|
||||
"""
|
||||
if value is None:
|
||||
return "String" # Default for null values
|
||||
|
||||
if isinstance(value, bool):
|
||||
return "Bool"
|
||||
elif isinstance(value, int):
|
||||
return "Int64"
|
||||
elif isinstance(value, float):
|
||||
return "Float64"
|
||||
elif isinstance(value, str):
|
||||
return "String"
|
||||
elif isinstance(value, list):
|
||||
# Use the sophisticated array type inference
|
||||
array_type = _infer_array_type(value)
|
||||
return array_type if array_type else "Array(Dynamic)"
|
||||
elif isinstance(value, dict):
|
||||
return "JSON"
|
||||
else:
|
||||
return "String" # Default fallback
|
||||
|
||||
|
||||
def _extract_json_paths(
|
||||
obj: Any,
|
||||
current_path: str = "",
|
||||
path_types: Optional[Dict[str, Set[str]]] = None,
|
||||
level: int = 0,
|
||||
) -> Dict[str, Set[str]]:
|
||||
"""
|
||||
Recursively extract all paths and their types from a JSON object.
|
||||
Matches jsontypeexporter's analyzePValue logic.
|
||||
|
||||
Args:
|
||||
obj: The JSON object to traverse
|
||||
current_path: Current path being built (e.g., "user.name")
|
||||
path_types: Dictionary mapping paths to sets of types found
|
||||
level: Current nesting level (for depth limiting)
|
||||
|
||||
Returns:
|
||||
Dictionary mapping paths to sets of type strings
|
||||
"""
|
||||
if path_types is None:
|
||||
path_types = {}
|
||||
|
||||
if obj is None:
|
||||
if current_path:
|
||||
if current_path not in path_types:
|
||||
path_types[current_path] = set()
|
||||
path_types[current_path].add("String") # Null defaults to String
|
||||
return path_types
|
||||
|
||||
if isinstance(obj, dict):
|
||||
# For objects, add the object itself and recurse into keys
|
||||
if current_path:
|
||||
if current_path not in path_types:
|
||||
path_types[current_path] = set()
|
||||
path_types[current_path].add("JSON")
|
||||
|
||||
for key, value in obj.items():
|
||||
# Build the path for this key
|
||||
if current_path:
|
||||
new_path = f"{current_path}.{key}"
|
||||
else:
|
||||
new_path = key
|
||||
|
||||
# Recurse into the value
|
||||
_extract_json_paths(value, new_path, path_types, level + 1)
|
||||
|
||||
elif isinstance(obj, list):
|
||||
# Skip empty arrays
|
||||
if len(obj) == 0:
|
||||
return path_types
|
||||
|
||||
# Collect types from array elements (matching Go: types := make([]pcommon.ValueType, 0, s.Len()))
|
||||
types = []
|
||||
|
||||
for item in obj:
|
||||
if isinstance(item, dict):
|
||||
# When traversing into array element objects, use ArraySuffix ([])
|
||||
# This matches: prefix+ArraySuffix in the Go code
|
||||
# Example: if current_path is "education", we use "education[]" to traverse into objects
|
||||
array_prefix = current_path + ARRAY_SUFFIX if current_path else ""
|
||||
for key, value in item.items():
|
||||
if array_prefix:
|
||||
# Use array separator: education[].name
|
||||
array_path = f"{array_prefix}.{key}"
|
||||
else:
|
||||
array_path = key
|
||||
# Recurse without increasing level (matching Go behavior)
|
||||
_extract_json_paths(value, array_path, path_types, level)
|
||||
types.append("JSON")
|
||||
elif isinstance(item, list):
|
||||
# Arrays inside arrays are not supported - skip the whole path
|
||||
# Matching Go: e.logger.Error("arrays inside arrays are not supported!", ...); return nil
|
||||
return path_types
|
||||
elif isinstance(item, str):
|
||||
types.append("String")
|
||||
elif isinstance(item, bool):
|
||||
types.append("Bool")
|
||||
elif isinstance(item, float):
|
||||
types.append("Float64")
|
||||
elif isinstance(item, int):
|
||||
types.append("Int64")
|
||||
|
||||
# Infer array type from collected types (matching Go: if mask := inferArrayMask(types); mask != 0)
|
||||
if len(types) > 0:
|
||||
array_type = _infer_array_type_from_type_strings(types)
|
||||
if array_type and current_path:
|
||||
if current_path not in path_types:
|
||||
path_types[current_path] = set()
|
||||
path_types[current_path].add(array_type)
|
||||
|
||||
else:
|
||||
# Primitive value (string, number, bool)
|
||||
if current_path:
|
||||
if current_path not in path_types:
|
||||
path_types[current_path] = set()
|
||||
obj_type = _python_type_to_clickhouse_type(obj)
|
||||
path_types[current_path].add(obj_type)
|
||||
|
||||
return path_types
|
||||
|
||||
|
||||
def _parse_json_bodies_and_extract_paths(
|
||||
json_bodies: List[str],
|
||||
timestamp: Optional[datetime.datetime] = None,
|
||||
) -> List[JSONPathType]:
|
||||
"""
|
||||
Parse JSON bodies and extract all paths with their types.
|
||||
This mimics the behavior of jsontypeexporter.
|
||||
|
||||
Args:
|
||||
json_bodies: List of JSON body strings to parse
|
||||
timestamp: Timestamp to use for last_seen (defaults to now)
|
||||
|
||||
Returns:
|
||||
List of JSONPathType objects with all discovered paths and types
|
||||
"""
|
||||
if timestamp is None:
|
||||
timestamp = datetime.datetime.now()
|
||||
|
||||
# Aggregate all paths and their types across all JSON bodies
|
||||
all_path_types: Dict[str, Set[str]] = {}
|
||||
|
||||
for json_body in json_bodies:
|
||||
try:
|
||||
parsed = json.loads(json_body)
|
||||
_extract_json_paths(parsed, "", all_path_types, level=0)
|
||||
except (json.JSONDecodeError, TypeError):
|
||||
# Skip invalid JSON
|
||||
continue
|
||||
|
||||
# Convert to list of JSONPathType objects
|
||||
# Each path can have multiple types, so we create one JSONPathType per type
|
||||
path_type_objects: List[JSONPathType] = []
|
||||
for path, types_set in all_path_types.items():
|
||||
for type_str in types_set:
|
||||
path_type_objects.append(
|
||||
JSONPathType(path=path, type=type_str, last_seen=timestamp)
|
||||
)
|
||||
|
||||
return path_type_objects
|
||||
|
||||
|
||||
@pytest.fixture(name="export_json_types", scope="function")
|
||||
def export_json_types(
|
||||
clickhouse: types.TestContainerClickhouse,
|
||||
request: pytest.FixtureRequest, # To access migrator fixture
|
||||
) -> Generator[
|
||||
Callable[[Union[List[JSONPathType], List[str], List[Any]]], None], Any, None
|
||||
]:
|
||||
"""
|
||||
Fixture for exporting JSON type metadata to the path_types table.
|
||||
This is a simpler version of jsontypeexporter for test fixtures.
|
||||
|
||||
The function can accept:
|
||||
1. List of JSONPathType objects (manual specification)
|
||||
2. List of JSON body strings (auto-extract paths)
|
||||
3. List of Logs objects (extract from body_json field)
|
||||
|
||||
Usage examples:
|
||||
# Manual specification
|
||||
export_json_types([
|
||||
JSONPathType(path="user.name", type="String"),
|
||||
JSONPathType(path="user.age", type="Int64"),
|
||||
])
|
||||
|
||||
# Auto-extract from JSON strings
|
||||
export_json_types([
|
||||
'{"user": {"name": "alice", "age": 25}}',
|
||||
'{"user": {"name": "bob", "age": 30}}',
|
||||
])
|
||||
|
||||
# Auto-extract from Logs objects
|
||||
export_json_types(logs_list)
|
||||
"""
|
||||
# Ensure migrator has run to create the table
|
||||
try:
|
||||
request.getfixturevalue("migrator")
|
||||
except Exception:
|
||||
# If migrator fixture is not available, that's okay - table might already exist
|
||||
pass
|
||||
|
||||
def _export_json_types(
|
||||
data: Union[
|
||||
List[JSONPathType], List[str], List[Any]
|
||||
], # List[Logs] but avoiding circular import
|
||||
) -> None:
|
||||
"""
|
||||
Export JSON type metadata to signoz_metadata.distributed_json_path_types table.
|
||||
This table stores path and type information for body JSON fields.
|
||||
"""
|
||||
path_types: List[JSONPathType] = []
|
||||
|
||||
if len(data) == 0:
|
||||
return
|
||||
|
||||
# Determine input type and convert to JSONPathType list
|
||||
first_item = data[0]
|
||||
|
||||
if isinstance(first_item, JSONPathType):
|
||||
# Already JSONPathType objects
|
||||
path_types = data # type: ignore
|
||||
elif isinstance(first_item, str):
|
||||
# List of JSON strings - parse and extract paths
|
||||
path_types = _parse_json_bodies_and_extract_paths(data) # type: ignore
|
||||
else:
|
||||
# Assume it's a list of Logs objects - extract body_v2
|
||||
json_bodies: List[str] = []
|
||||
for log in data: # type: ignore
|
||||
# Try to get body_v2 attribute
|
||||
if hasattr(log, "body_v2") and log.body_v2:
|
||||
json_bodies.append(log.body_v2)
|
||||
elif hasattr(log, "body") and log.body:
|
||||
# Fallback to body if body_v2 not available
|
||||
try:
|
||||
# Try to parse as JSON
|
||||
json.loads(log.body)
|
||||
json_bodies.append(log.body)
|
||||
except (json.JSONDecodeError, TypeError):
|
||||
pass
|
||||
|
||||
if json_bodies:
|
||||
path_types = _parse_json_bodies_and_extract_paths(json_bodies)
|
||||
|
||||
if len(path_types) == 0:
|
||||
return
|
||||
|
||||
clickhouse.conn.insert(
|
||||
database="signoz_metadata",
|
||||
table="distributed_json_path_types",
|
||||
data=[path_type.np_arr() for path_type in path_types],
|
||||
column_names=[
|
||||
"path",
|
||||
"type",
|
||||
"last_seen",
|
||||
],
|
||||
)
|
||||
|
||||
yield _export_json_types
|
||||
|
||||
# Cleanup - truncate the local table after tests (following pattern from logs fixture)
|
||||
clickhouse.conn.query(
|
||||
f"TRUNCATE TABLE signoz_metadata.json_path_types ON CLUSTER '{clickhouse.env['SIGNOZ_TELEMETRYSTORE_CLICKHOUSE_CLUSTER']}' SYNC"
|
||||
)
|
||||
|
||||
|
||||
@pytest.fixture(name="export_promoted_paths", scope="function")
|
||||
def export_promoted_paths(
|
||||
clickhouse: types.TestContainerClickhouse,
|
||||
request: pytest.FixtureRequest, # To access migrator fixture
|
||||
) -> Generator[Callable[[List[str]], None], Any, None]:
|
||||
"""
|
||||
Fixture for exporting promoted JSON paths to the promoted paths table.
|
||||
"""
|
||||
# Ensure migrator has run to create the table
|
||||
try:
|
||||
request.getfixturevalue("migrator")
|
||||
except Exception:
|
||||
# If migrator fixture is not available, that's okay - table might already exist
|
||||
pass
|
||||
|
||||
def _export_promoted_paths(paths: List[str]) -> None:
|
||||
if len(paths) == 0:
|
||||
return
|
||||
|
||||
now_ms = int(datetime.datetime.now().timestamp() * 1000)
|
||||
rows = [(path, now_ms) for path in paths]
|
||||
clickhouse.conn.insert(
|
||||
database="signoz_metadata",
|
||||
table="distributed_json_promoted_paths",
|
||||
data=rows,
|
||||
column_names=[
|
||||
"path",
|
||||
"created_at",
|
||||
],
|
||||
)
|
||||
|
||||
yield _export_promoted_paths
|
||||
|
||||
clickhouse.conn.query(
|
||||
f"TRUNCATE TABLE signoz_metadata.json_promoted_paths ON CLUSTER '{clickhouse.env['SIGNOZ_TELEMETRYSTORE_CLICKHOUSE_CLUSTER']}' SYNC"
|
||||
)
|
||||
@@ -122,6 +122,8 @@ class Logs(ABC):
|
||||
resources: dict[str, Any] = {},
|
||||
attributes: dict[str, Any] = {},
|
||||
body: str = "default body",
|
||||
body_v2: Optional[str] = None,
|
||||
body_promoted: Optional[str] = None,
|
||||
severity_text: str = "INFO",
|
||||
trace_id: str = "",
|
||||
span_id: str = "",
|
||||
@@ -167,6 +169,33 @@ class Logs(ABC):
|
||||
# Set body
|
||||
self.body = body
|
||||
|
||||
# Set body_v2 - if body is JSON, parse and stringify it, otherwise use empty string
|
||||
# ClickHouse accepts String input for JSON column
|
||||
if body_v2 is not None:
|
||||
self.body_v2 = body_v2
|
||||
else:
|
||||
# Try to parse body as JSON; if successful use it directly,
|
||||
# otherwise wrap as {"message": body} matching the normalize operator behavior.
|
||||
try:
|
||||
json.loads(body)
|
||||
self.body_v2 = body
|
||||
except (json.JSONDecodeError, TypeError):
|
||||
self.body_v2 = json.dumps({"message": body})
|
||||
|
||||
# Set body_promoted - must be valid JSON
|
||||
# Tests will explicitly pass promoted column's content, but we validate it
|
||||
if body_promoted is not None:
|
||||
# Validate that it's valid JSON
|
||||
try:
|
||||
json.loads(body_promoted)
|
||||
self.body_promoted = body_promoted
|
||||
except (json.JSONDecodeError, TypeError):
|
||||
# If invalid, default to empty JSON object
|
||||
self.body_promoted = "{}"
|
||||
else:
|
||||
# Default to empty JSON object (valid JSON)
|
||||
self.body_promoted = "{}"
|
||||
|
||||
# Process resources and attributes
|
||||
self.resources_string = {k: str(v) for k, v in resources.items()}
|
||||
self.resource_json = (
|
||||
@@ -326,6 +355,8 @@ class Logs(ABC):
|
||||
self.severity_text,
|
||||
self.severity_number,
|
||||
self.body,
|
||||
self.body_v2,
|
||||
self.body_promoted,
|
||||
self.attributes_string,
|
||||
self.attributes_number,
|
||||
self.attributes_bool,
|
||||
@@ -454,31 +485,53 @@ def insert_logs(
|
||||
data=[resource_key.np_arr() for resource_key in resource_keys],
|
||||
)
|
||||
|
||||
# All columns in insertion order (must match Logs.np_arr() order)
|
||||
all_column_names = [
|
||||
"ts_bucket_start",
|
||||
"resource_fingerprint",
|
||||
"timestamp",
|
||||
"observed_timestamp",
|
||||
"id",
|
||||
"trace_id",
|
||||
"span_id",
|
||||
"trace_flags",
|
||||
"severity_text",
|
||||
"severity_number",
|
||||
"body",
|
||||
"body_v2",
|
||||
"body_promoted",
|
||||
"attributes_string",
|
||||
"attributes_number",
|
||||
"attributes_bool",
|
||||
"resources_string",
|
||||
"scope_name",
|
||||
"scope_version",
|
||||
"scope_string",
|
||||
"resource",
|
||||
]
|
||||
|
||||
# Check if body_v2 column exists (only present when ENABLE_LOGS_MIGRATIONS_V2 migration has run)
|
||||
result = clickhouse.conn.query(
|
||||
"SELECT count() FROM system.columns WHERE database = 'signoz_logs' AND table = 'logs_v2' AND name = 'body_v2'"
|
||||
)
|
||||
has_json_body = result.result_rows[0][0] > 0
|
||||
|
||||
if has_json_body:
|
||||
column_names = all_column_names
|
||||
data = [log.np_arr() for log in logs]
|
||||
else:
|
||||
json_body_cols = {"body_v2", "body_promoted"}
|
||||
keep_indices = [
|
||||
i for i, c in enumerate(all_column_names) if c not in json_body_cols
|
||||
]
|
||||
column_names = [all_column_names[i] for i in keep_indices]
|
||||
data = [log.np_arr()[keep_indices] for log in logs]
|
||||
|
||||
clickhouse.conn.insert(
|
||||
database="signoz_logs",
|
||||
table="distributed_logs_v2",
|
||||
data=[log.np_arr() for log in logs],
|
||||
column_names=[
|
||||
"ts_bucket_start",
|
||||
"resource_fingerprint",
|
||||
"timestamp",
|
||||
"observed_timestamp",
|
||||
"id",
|
||||
"trace_id",
|
||||
"span_id",
|
||||
"trace_flags",
|
||||
"severity_text",
|
||||
"severity_number",
|
||||
"body",
|
||||
"attributes_string",
|
||||
"attributes_number",
|
||||
"attributes_bool",
|
||||
"resources_string",
|
||||
"scope_name",
|
||||
"scope_version",
|
||||
"scope_string",
|
||||
"resource",
|
||||
],
|
||||
data=data,
|
||||
column_names=column_names,
|
||||
)
|
||||
|
||||
yield _insert_logs
|
||||
|
||||
@@ -1,3 +1,5 @@
|
||||
from typing import Optional
|
||||
|
||||
import docker
|
||||
import pytest
|
||||
from testcontainers.core.container import Network
|
||||
@@ -8,27 +10,32 @@ from fixtures.logger import setup_logger
|
||||
logger = setup_logger(__name__)
|
||||
|
||||
|
||||
@pytest.fixture(name="migrator", scope="package")
|
||||
def migrator(
|
||||
def create_migrator(
|
||||
network: Network,
|
||||
clickhouse: types.TestContainerClickhouse,
|
||||
request: pytest.FixtureRequest,
|
||||
pytestconfig: pytest.Config,
|
||||
cache_key: str = "migrator",
|
||||
env_overrides: Optional[dict] = None,
|
||||
) -> types.Operation:
|
||||
"""
|
||||
Package-scoped fixture for running schema migrations.
|
||||
Factory function for running schema migrations.
|
||||
Accepts optional env_overrides to customize the migrator environment.
|
||||
"""
|
||||
|
||||
def create() -> None:
|
||||
version = request.config.getoption("--schema-migrator-version")
|
||||
client = docker.from_env()
|
||||
|
||||
environment = dict(env_overrides) if env_overrides else {}
|
||||
|
||||
container = client.containers.run(
|
||||
image=f"signoz/signoz-schema-migrator:{version}",
|
||||
command=f"sync --replication=true --cluster-name=cluster --up= --dsn={clickhouse.env["SIGNOZ_TELEMETRYSTORE_CLICKHOUSE_DSN"]}",
|
||||
detach=True,
|
||||
auto_remove=False,
|
||||
network=network.id,
|
||||
environment=environment,
|
||||
)
|
||||
|
||||
result = container.wait()
|
||||
@@ -47,6 +54,7 @@ def migrator(
|
||||
detach=True,
|
||||
auto_remove=False,
|
||||
network=network.id,
|
||||
environment=environment,
|
||||
)
|
||||
|
||||
result = container.wait()
|
||||
@@ -59,7 +67,7 @@ def migrator(
|
||||
|
||||
container.remove()
|
||||
|
||||
return types.Operation(name="migrator")
|
||||
return types.Operation(name=cache_key)
|
||||
|
||||
def delete(_: types.Operation) -> None:
|
||||
pass
|
||||
@@ -70,9 +78,27 @@ def migrator(
|
||||
return dev.wrap(
|
||||
request,
|
||||
pytestconfig,
|
||||
"migrator",
|
||||
cache_key,
|
||||
lambda: types.Operation(name=""),
|
||||
create,
|
||||
delete,
|
||||
restore,
|
||||
)
|
||||
|
||||
|
||||
@pytest.fixture(name="migrator", scope="package")
|
||||
def migrator(
|
||||
network: Network,
|
||||
clickhouse: types.TestContainerClickhouse,
|
||||
request: pytest.FixtureRequest,
|
||||
pytestconfig: pytest.Config,
|
||||
) -> types.Operation:
|
||||
"""
|
||||
Package-scoped fixture for running schema migrations.
|
||||
"""
|
||||
return create_migrator(
|
||||
network=network,
|
||||
clickhouse=clickhouse,
|
||||
request=request,
|
||||
pytestconfig=pytestconfig,
|
||||
)
|
||||
|
||||
1181
tests/integration/src/querier_json_body/01_logs_json_body_new_qb.py
Normal file
1181
tests/integration/src/querier_json_body/01_logs_json_body_new_qb.py
Normal file
File diff suppressed because it is too large
Load Diff
0
tests/integration/src/querier_json_body/__init__.py
Normal file
0
tests/integration/src/querier_json_body/__init__.py
Normal file
70
tests/integration/src/querier_json_body/conftest.py
Normal file
70
tests/integration/src/querier_json_body/conftest.py
Normal file
@@ -0,0 +1,70 @@
|
||||
import pytest
|
||||
from testcontainers.core.container import Network
|
||||
|
||||
from fixtures import types
|
||||
from fixtures.migrator import create_migrator
|
||||
from fixtures.signoz import create_signoz
|
||||
|
||||
UNSUPPORTED_CLICKHOUSE_VERSIONS = {"25.5.6"}
|
||||
|
||||
|
||||
def pytest_collection_modifyitems(
|
||||
config: pytest.Config, items: list[pytest.Item]
|
||||
) -> None:
|
||||
version = config.getoption("--clickhouse-version")
|
||||
if version in UNSUPPORTED_CLICKHOUSE_VERSIONS:
|
||||
skip = pytest.mark.skip(
|
||||
reason=f"JSON body QB tests require ClickHouse > {version}"
|
||||
)
|
||||
for item in items:
|
||||
item.add_marker(skip)
|
||||
|
||||
|
||||
@pytest.fixture(name="migrator", scope="package")
|
||||
def migrator_json(
|
||||
network: Network,
|
||||
clickhouse: types.TestContainerClickhouse,
|
||||
request: pytest.FixtureRequest,
|
||||
pytestconfig: pytest.Config,
|
||||
) -> types.Operation:
|
||||
"""
|
||||
Package-scoped migrator with ENABLE_LOGS_MIGRATIONS_V2=1.
|
||||
"""
|
||||
return create_migrator(
|
||||
network=network,
|
||||
clickhouse=clickhouse,
|
||||
request=request,
|
||||
pytestconfig=pytestconfig,
|
||||
cache_key="migrator-json-body",
|
||||
env_overrides={
|
||||
"ENABLE_LOGS_MIGRATIONS_V2": "1",
|
||||
},
|
||||
)
|
||||
|
||||
|
||||
@pytest.fixture(name="signoz", scope="package")
|
||||
def signoz_json_body(
|
||||
network: Network,
|
||||
zeus: types.TestContainerDocker,
|
||||
gateway: types.TestContainerDocker,
|
||||
sqlstore: types.TestContainerSQL,
|
||||
clickhouse: types.TestContainerClickhouse,
|
||||
request: pytest.FixtureRequest,
|
||||
pytestconfig: pytest.Config,
|
||||
) -> types.SigNoz:
|
||||
"""
|
||||
Package-scoped fixture for SigNoz with BODY_JSON_QUERY_ENABLED=true.
|
||||
"""
|
||||
return create_signoz(
|
||||
network=network,
|
||||
zeus=zeus,
|
||||
gateway=gateway,
|
||||
sqlstore=sqlstore,
|
||||
clickhouse=clickhouse,
|
||||
request=request,
|
||||
pytestconfig=pytestconfig,
|
||||
cache_key="signoz-json-body",
|
||||
env_overrides={
|
||||
"BODY_JSON_QUERY_ENABLED": "true",
|
||||
},
|
||||
)
|
||||
Reference in New Issue
Block a user