mirror of
https://github.com/SigNoz/signoz.git
synced 2026-04-23 04:10:29 +01:00
Compare commits
99 Commits
refactor/r
...
feat/json-
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
e39e93b01a | ||
|
|
8fbc490673 | ||
|
|
88c53c456d | ||
|
|
8cda4644fb | ||
|
|
c3a73c6626 | ||
|
|
6be50ec7bc | ||
|
|
98cefa1975 | ||
|
|
43bbde1887 | ||
|
|
d91c870fb5 | ||
|
|
5a441d3367 | ||
|
|
ddf0975cf6 | ||
|
|
3f8a5af14c | ||
|
|
65ce61c0dd | ||
|
|
1350dd8226 | ||
|
|
14ecd0f8db | ||
|
|
7eccbc96b6 | ||
|
|
0a595e1b71 | ||
|
|
e8ed7592e6 | ||
|
|
eee7788108 | ||
|
|
0a37aa0bd1 | ||
|
|
7d7307a6eb | ||
|
|
91beeb4949 | ||
|
|
9f8c3466b1 | ||
|
|
701323c822 | ||
|
|
cc193c9be5 | ||
|
|
512dc579b1 | ||
|
|
d904953afe | ||
|
|
9690c06cb2 | ||
|
|
8c44925a67 | ||
|
|
1ea35a19d7 | ||
|
|
c27db82794 | ||
|
|
5d9685ad9e | ||
|
|
8d0cc2d99d | ||
|
|
9643fae27a | ||
|
|
750394a73f | ||
|
|
19a65e80d8 | ||
|
|
bbef04352e | ||
|
|
0758ff133b | ||
|
|
74dceb844b | ||
|
|
0d0ebe8fe7 | ||
|
|
b35a6213a6 | ||
|
|
9cef109158 | ||
|
|
bb308c263f | ||
|
|
67e1b82adb | ||
|
|
1e8c0f19f5 | ||
|
|
c027181935 | ||
|
|
7122fb8b54 | ||
|
|
67830c8a16 | ||
|
|
096eee6435 | ||
|
|
30f5f2f2f2 | ||
|
|
52adb84461 | ||
|
|
63b7f15d0e | ||
|
|
6d3c88ed21 | ||
|
|
fc6a67aec1 | ||
|
|
9f41499047 | ||
|
|
9cd554d293 | ||
|
|
415387edfc | ||
|
|
fa1bc3db9b | ||
|
|
6c6dad8a66 | ||
|
|
7403f86773 | ||
|
|
9176ef0589 | ||
|
|
5c2a338189 | ||
|
|
704bab23cf | ||
|
|
371da26b3c | ||
|
|
97fbfbdc13 | ||
|
|
4b112988ef | ||
|
|
a47ecf3907 | ||
|
|
e4a78cf556 | ||
|
|
b6adecc294 | ||
|
|
40333a5fee | ||
|
|
4af6a9abae | ||
|
|
55e892dad3 | ||
|
|
181116308f | ||
|
|
eaa678910b | ||
|
|
e994caeb02 | ||
|
|
10840f8495 | ||
|
|
1fcd3adfc8 | ||
|
|
3e14b26b00 | ||
|
|
b30bfa6371 | ||
|
|
e7f4a04b36 | ||
|
|
0687634da3 | ||
|
|
7e7732243e | ||
|
|
2f952e402f | ||
|
|
a12febca4a | ||
|
|
cb71c9c3f7 | ||
|
|
1cd4ce6509 | ||
|
|
9299c8ab18 | ||
|
|
24749de269 | ||
|
|
39098ec3f4 | ||
|
|
fe554f5c94 | ||
|
|
8a60a041a6 | ||
|
|
541f19c34a | ||
|
|
010db03d6e | ||
|
|
5408acbd8c | ||
|
|
0de6c85f81 | ||
|
|
69ec24fa05 | ||
|
|
539d732b65 | ||
|
|
843d5fb199 | ||
|
|
fabdfb8cc1 |
3
.github/workflows/integrationci.yaml
vendored
3
.github/workflows/integrationci.yaml
vendored
@@ -52,6 +52,7 @@ jobs:
|
||||
- ingestionkeys
|
||||
- rootuser
|
||||
- serviceaccount
|
||||
- querier_json_body
|
||||
sqlstore-provider:
|
||||
- postgres
|
||||
- sqlite
|
||||
@@ -61,7 +62,7 @@ jobs:
|
||||
- 25.5.6
|
||||
- 25.12.5
|
||||
schema-migrator-version:
|
||||
- v0.142.0
|
||||
- v0.144.3
|
||||
postgres-version:
|
||||
- 15
|
||||
if: |
|
||||
|
||||
@@ -2705,8 +2705,8 @@ components:
|
||||
type: object
|
||||
PromotetypesWrappedIndex:
|
||||
properties:
|
||||
column_type:
|
||||
type: string
|
||||
fieldDataType:
|
||||
$ref: '#/components/schemas/TelemetrytypesFieldDataType'
|
||||
granularity:
|
||||
type: integer
|
||||
type:
|
||||
|
||||
@@ -3469,10 +3469,7 @@ export interface PromotetypesPromotePathDTO {
|
||||
}
|
||||
|
||||
export interface PromotetypesWrappedIndexDTO {
|
||||
/**
|
||||
* @type string
|
||||
*/
|
||||
column_type?: string;
|
||||
fieldDataType?: TelemetrytypesFieldDataTypeDTO;
|
||||
/**
|
||||
* @type integer
|
||||
*/
|
||||
|
||||
2
go.mod
2
go.mod
@@ -8,7 +8,7 @@ require (
|
||||
github.com/ClickHouse/clickhouse-go/v2 v2.40.1
|
||||
github.com/DATA-DOG/go-sqlmock v1.5.2
|
||||
github.com/SigNoz/govaluate v0.0.0-20240203125216-988004ccc7fd
|
||||
github.com/SigNoz/signoz-otel-collector v0.144.3-rc.4
|
||||
github.com/SigNoz/signoz-otel-collector v0.144.3
|
||||
github.com/antlr4-go/antlr/v4 v4.13.1
|
||||
github.com/antonmedv/expr v1.15.3
|
||||
github.com/cespare/xxhash/v2 v2.3.0
|
||||
|
||||
4
go.sum
4
go.sum
@@ -108,8 +108,8 @@ github.com/SigNoz/expr v1.17.7-beta h1:FyZkleM5dTQ0O6muQfwGpoH5A2ohmN/XTasRCO72g
|
||||
github.com/SigNoz/expr v1.17.7-beta/go.mod h1:8/vRC7+7HBzESEqt5kKpYXxrxkr31SaO8r40VO/1IT4=
|
||||
github.com/SigNoz/govaluate v0.0.0-20240203125216-988004ccc7fd h1:Bk43AsDYe0fhkbj57eGXx8H3ZJ4zhmQXBnrW523ktj8=
|
||||
github.com/SigNoz/govaluate v0.0.0-20240203125216-988004ccc7fd/go.mod h1:nxRcH/OEdM8QxzH37xkGzomr1O0JpYBRS6pwjsWW6Pc=
|
||||
github.com/SigNoz/signoz-otel-collector v0.144.3-rc.4 h1:EskJkEMfuuIyArWhV8SleDV/fuKxiaEGTXrCZIFqDT4=
|
||||
github.com/SigNoz/signoz-otel-collector v0.144.3-rc.4/go.mod h1:9pLVpcIQvUT88ZiNnZN/MI+XLruvwC+Xm2UzPmGjNfA=
|
||||
github.com/SigNoz/signoz-otel-collector v0.144.3 h1:/7PPIqIpPsaWtrgnfHam2XVYP41ZlgEKLHzQO8oVxcA=
|
||||
github.com/SigNoz/signoz-otel-collector v0.144.3/go.mod h1:9pLVpcIQvUT88ZiNnZN/MI+XLruvwC+Xm2UzPmGjNfA=
|
||||
github.com/Yiling-J/theine-go v0.6.2 h1:1GeoXeQ0O0AUkiwj2S9Jc0Mzx+hpqzmqsJ4kIC4M9AY=
|
||||
github.com/Yiling-J/theine-go v0.6.2/go.mod h1:08QpMa5JZ2pKN+UJCRrCasWYO1IKCdl54Xa836rpmDU=
|
||||
github.com/afex/hystrix-go v0.0.0-20180502004556-fa1af6a1f4f5/go.mod h1:SkGFH1ia65gfNATL8TAiHDNxPzPdmEL5uirI2Uyuz6c=
|
||||
|
||||
@@ -32,27 +32,17 @@ func NewModule(metadataStore telemetrytypes.MetadataStore, telemetrystore teleme
|
||||
}
|
||||
|
||||
func (m *module) ListPromotedAndIndexedPaths(ctx context.Context) ([]promotetypes.PromotePath, error) {
|
||||
logsIndexes, err := m.metadataStore.ListLogsJSONIndexes(ctx)
|
||||
indexes, err := m.metadataStore.ListLogsJSONIndexes(ctx)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
// Flatten the map values (which are slices) into a single slice
|
||||
indexes := slices.Concat(slices.Collect(maps.Values(logsIndexes))...)
|
||||
|
||||
aggr := map[string][]promotetypes.WrappedIndex{}
|
||||
for _, index := range indexes {
|
||||
path, columnType, err := schemamigrator.UnfoldJSONSubColumnIndexExpr(index.Expression)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
// clean backticks from the path
|
||||
path = strings.ReplaceAll(path, "`", "")
|
||||
|
||||
aggr[path] = append(aggr[path], promotetypes.WrappedIndex{
|
||||
ColumnType: columnType,
|
||||
Type: index.Type,
|
||||
Granularity: index.Granularity,
|
||||
aggr[index.Name] = append(aggr[index.Name], promotetypes.WrappedIndex{
|
||||
FieldDataType: index.FieldDataType,
|
||||
Type: index.IndexType,
|
||||
Granularity: index.Granularity,
|
||||
})
|
||||
}
|
||||
promotedPaths, err := m.listPromotedPaths(ctx)
|
||||
|
||||
@@ -204,7 +204,7 @@ func AdjustKey(key *telemetrytypes.TelemetryFieldKey, keys map[string][]*telemet
|
||||
// Downstream query builder should handle multiple matching keys with their own metadata
|
||||
// and not rely on this function to do so.
|
||||
materialized := true
|
||||
indexes := []telemetrytypes.JSONDataTypeIndex{}
|
||||
indexes := []telemetrytypes.TelemetryFieldKeySkipIndex{}
|
||||
fieldContextsSeen := map[telemetrytypes.FieldContext]bool{}
|
||||
dataTypesSeen := map[telemetrytypes.FieldDataType]bool{}
|
||||
for _, matchingKey := range matchingKeys {
|
||||
|
||||
@@ -195,8 +195,8 @@ func (c *jsonConditionBuilder) buildPrimitiveTerminalCondition(node *telemetryty
|
||||
// the field genuinely holds the empty/zero value.
|
||||
//
|
||||
// Note: indexing is also skipped for Array Nested fields because they cannot be indexed.
|
||||
indexed := slices.ContainsFunc(node.TerminalConfig.Key.Indexes, func(index telemetrytypes.JSONDataTypeIndex) bool {
|
||||
return index.Type == node.TerminalConfig.ElemType
|
||||
indexed := slices.ContainsFunc(node.TerminalConfig.Key.Indexes, func(index telemetrytypes.TelemetryFieldKeySkipIndex) bool {
|
||||
return telemetrytypes.MappingFieldDataTypeToJSONDataType[index.FieldDataType] == node.TerminalConfig.ElemType
|
||||
})
|
||||
isExistsCheck := operator == qbtypes.FilterOperatorExists || operator == qbtypes.FilterOperatorNotExists
|
||||
if node.TerminalConfig.ElemType.IndexSupported && indexed && !isExistsCheck {
|
||||
|
||||
@@ -1127,9 +1127,12 @@ func buildTestTelemetryMetadataStore(t *testing.T, addIndexes bool) *telemetryty
|
||||
return entry.Path == path && entry.Type == jsonType
|
||||
})
|
||||
if idx >= 0 {
|
||||
key.Indexes = append(key.Indexes, telemetrytypes.JSONDataTypeIndex{
|
||||
Type: jsonType,
|
||||
ColumnExpression: schemamigrator.JSONSubColumnIndexExpr(LogsV2BodyV2Column, path, jsonType.StringValue()),
|
||||
key.Indexes = append(key.Indexes, telemetrytypes.TelemetryFieldKeySkipIndex{
|
||||
Name: path,
|
||||
FieldContext: telemetrytypes.FieldContextBody,
|
||||
FieldDataType: fdt,
|
||||
BaseColumn: LogsV2BodyV2Column,
|
||||
IndexExpression: schemamigrator.JSONSubColumnIndexExpr(LogsV2BodyV2Column, path, jsonType.StringValue()),
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
@@ -106,7 +106,7 @@ func (t *telemetryMetaStore) buildJSONPlans(keys []*telemetrytypes.TelemetryFiel
|
||||
return nil
|
||||
}
|
||||
|
||||
func (t *telemetryMetaStore) getJSONPathIndexes(ctx context.Context, paths ...string) (map[string][]telemetrytypes.JSONDataTypeIndex, error) {
|
||||
func (t *telemetryMetaStore) getJSONPathIndexes(ctx context.Context, paths ...string) (map[string][]telemetrytypes.TelemetryFieldKeySkipIndex, error) {
|
||||
filteredPaths := []string{}
|
||||
for _, path := range paths {
|
||||
// skip array paths; since they don't have any indexes
|
||||
@@ -116,47 +116,22 @@ func (t *telemetryMetaStore) getJSONPathIndexes(ctx context.Context, paths ...st
|
||||
filteredPaths = append(filteredPaths, path)
|
||||
}
|
||||
if len(filteredPaths) == 0 {
|
||||
return make(map[string][]telemetrytypes.JSONDataTypeIndex), nil
|
||||
return make(map[string][]telemetrytypes.TelemetryFieldKeySkipIndex), nil
|
||||
}
|
||||
|
||||
// list indexes for the paths
|
||||
indexesMap, err := t.ListLogsJSONIndexes(ctx, filteredPaths...)
|
||||
indexes, err := t.ListLogsJSONIndexes(ctx, filteredPaths...)
|
||||
if err != nil {
|
||||
return nil, errors.WrapInternalf(err, CodeFailLoadLogsJSONIndexes, "failed to list JSON path indexes")
|
||||
}
|
||||
|
||||
// build a set of indexes
|
||||
cleanIndexes := make(map[string][]telemetrytypes.JSONDataTypeIndex)
|
||||
for path, indexes := range indexesMap {
|
||||
for _, index := range indexes {
|
||||
columnExpr, columnType, err := schemamigrator.UnfoldJSONSubColumnIndexExpr(index.Expression)
|
||||
if err != nil {
|
||||
return nil, errors.WrapInternalf(err, CodeFailLoadLogsJSONIndexes, "failed to unfold JSON sub column index expression: %s", index.Expression)
|
||||
}
|
||||
|
||||
jsonDataType, found := telemetrytypes.MappingStringToJSONDataType[columnType]
|
||||
if !found {
|
||||
t.logger.ErrorContext(ctx, "failed to map column type to JSON data type", slog.String("column_type", columnType), slog.String("column_expr", columnExpr))
|
||||
continue
|
||||
}
|
||||
|
||||
if jsonDataType == telemetrytypes.String {
|
||||
cleanIndexes[path] = append(cleanIndexes[path], telemetrytypes.JSONDataTypeIndex{
|
||||
Type: telemetrytypes.String,
|
||||
ColumnExpression: columnExpr,
|
||||
IndexExpression: index.Expression,
|
||||
})
|
||||
} else if strings.HasPrefix(index.Type, "minmax") {
|
||||
cleanIndexes[path] = append(cleanIndexes[path], telemetrytypes.JSONDataTypeIndex{
|
||||
Type: jsonDataType,
|
||||
ColumnExpression: columnExpr,
|
||||
IndexExpression: index.Expression,
|
||||
})
|
||||
}
|
||||
}
|
||||
fieldPathToIndexes := make(map[string][]telemetrytypes.TelemetryFieldKeySkipIndex)
|
||||
for _, index := range indexes {
|
||||
fieldPathToIndexes[index.Name] = append(fieldPathToIndexes[index.Name], index)
|
||||
}
|
||||
|
||||
return cleanIndexes, nil
|
||||
return fieldPathToIndexes, nil
|
||||
}
|
||||
|
||||
func buildListLogsJSONIndexesQuery(cluster string, filters ...string) (string, []any) {
|
||||
@@ -173,14 +148,15 @@ func buildListLogsJSONIndexesQuery(cluster string, filters ...string) (string, [
|
||||
|
||||
filterExprs := []string{}
|
||||
for _, filter := range filters {
|
||||
filterExprs = append(filterExprs, sb.ILike("expr", fmt.Sprintf("%%%s%%", querybuilder.FormatValueForContains(filter))))
|
||||
// Remove backticks from actual expr cuz paths from metadata doesn't have backticks
|
||||
filterExprs = append(filterExprs, sb.ILike("replaceAll(expr, '`', '')", fmt.Sprintf("%%%s%%", querybuilder.FormatValueForContains(filter))))
|
||||
}
|
||||
sb.Where(sb.Or(filterExprs...))
|
||||
|
||||
return sb.BuildWithFlavor(sqlbuilder.ClickHouse)
|
||||
}
|
||||
|
||||
func (t *telemetryMetaStore) ListLogsJSONIndexes(ctx context.Context, filters ...string) (map[string][]schemamigrator.Index, error) {
|
||||
func (t *telemetryMetaStore) ListLogsJSONIndexes(ctx context.Context, filters ...string) ([]telemetrytypes.TelemetryFieldKeySkipIndex, error) {
|
||||
ctx = withTelemetryContext(ctx, "ListLogsJSONIndexes")
|
||||
query, args := buildListLogsJSONIndexesQuery(t.telemetrystore.Cluster(), filters...)
|
||||
rows, err := t.telemetrystore.ClickhouseDB().Query(ctx, query, args...)
|
||||
@@ -189,7 +165,7 @@ func (t *telemetryMetaStore) ListLogsJSONIndexes(ctx context.Context, filters ..
|
||||
}
|
||||
defer rows.Close()
|
||||
|
||||
indexes := make(map[string][]schemamigrator.Index)
|
||||
indexes := []telemetrytypes.TelemetryFieldKeySkipIndex{}
|
||||
for rows.Next() {
|
||||
var name string
|
||||
var typeFull string
|
||||
@@ -198,11 +174,39 @@ func (t *telemetryMetaStore) ListLogsJSONIndexes(ctx context.Context, filters ..
|
||||
if err := rows.Scan(&name, &typeFull, &expr, &granularity); err != nil {
|
||||
return nil, errors.WrapInternalf(err, CodeFailLoadLogsJSONIndexes, "failed to scan string indexed column")
|
||||
}
|
||||
indexes[name] = append(indexes[name], schemamigrator.Index{
|
||||
Name: name,
|
||||
Type: typeFull,
|
||||
Expression: expr,
|
||||
Granularity: int(granularity),
|
||||
|
||||
columnExpr, columnType, err := schemamigrator.UnfoldJSONSubColumnIndexExpr(expr)
|
||||
if err != nil {
|
||||
return nil, errors.WrapInternalf(err, CodeFailLoadLogsJSONIndexes, "failed to unfold JSON sub column index expression: %s", expr)
|
||||
}
|
||||
|
||||
fdt, found := telemetrytypes.MappingJSONDataTypeToFieldDataType[columnType]
|
||||
if !found {
|
||||
t.logger.ErrorContext(ctx, "failed to map JSON data type to field data type", slog.String("column_type", columnType), slog.String("column_expr", columnExpr))
|
||||
continue
|
||||
}
|
||||
|
||||
baseColumn := ""
|
||||
fieldName := ""
|
||||
switch {
|
||||
case strings.HasPrefix(columnExpr, telemetrylogs.BodyV2ColumnPrefix):
|
||||
baseColumn = telemetrylogs.BodyV2ColumnPrefix
|
||||
fieldName = strings.TrimPrefix(columnExpr, telemetrylogs.BodyV2ColumnPrefix)
|
||||
case strings.HasPrefix(columnExpr, telemetrylogs.BodyPromotedColumnPrefix):
|
||||
baseColumn = telemetrylogs.BodyPromotedColumnPrefix
|
||||
fieldName = strings.TrimPrefix(columnExpr, telemetrylogs.BodyPromotedColumnPrefix)
|
||||
}
|
||||
fieldName = strings.ReplaceAll(fieldName, "`", "")
|
||||
|
||||
indexes = append(indexes, telemetrytypes.TelemetryFieldKeySkipIndex{
|
||||
Name: fieldName,
|
||||
FieldContext: telemetrytypes.FieldContextBody,
|
||||
FieldDataType: fdt,
|
||||
BaseColumn: baseColumn,
|
||||
IndexName: name,
|
||||
IndexType: typeFull,
|
||||
IndexExpression: expr,
|
||||
Granularity: int(granularity),
|
||||
})
|
||||
}
|
||||
|
||||
|
||||
@@ -36,7 +36,7 @@ func TestBuildListLogsJSONIndexesQuery(t *testing.T) {
|
||||
cluster: "test-cluster",
|
||||
filters: []string{"foo", "bar"},
|
||||
expectedSQL: "SELECT name, type_full, expr, granularity FROM clusterAllReplicas('test-cluster', system.data_skipping_indices) " +
|
||||
"WHERE database = ? AND table = ? AND (LOWER(expr) LIKE LOWER(?) OR LOWER(expr) LIKE LOWER(?)) AND (LOWER(expr) LIKE LOWER(?) OR LOWER(expr) LIKE LOWER(?))",
|
||||
"WHERE database = ? AND table = ? AND (LOWER(expr) LIKE LOWER(?) OR LOWER(expr) LIKE LOWER(?)) AND (LOWER(replaceAll(expr, '`', '')) LIKE LOWER(?) OR LOWER(replaceAll(expr, '`', '')) LIKE LOWER(?))",
|
||||
expectedArgs: []any{
|
||||
telemetrylogs.DBName,
|
||||
telemetrylogs.LogsV2LocalTableName,
|
||||
|
||||
@@ -10,10 +10,10 @@ import (
|
||||
)
|
||||
|
||||
type WrappedIndex struct {
|
||||
JSONDataType telemetrytypes.JSONDataType `json:"-"`
|
||||
ColumnType string `json:"column_type"`
|
||||
Type string `json:"type"`
|
||||
Granularity int `json:"granularity"`
|
||||
JSONDataType telemetrytypes.JSONDataType `json:"-"`
|
||||
FieldDataType telemetrytypes.FieldDataType `json:"fieldDataType"`
|
||||
Type string `json:"type"`
|
||||
Granularity int `json:"granularity"`
|
||||
}
|
||||
|
||||
type PromotePath struct {
|
||||
@@ -60,12 +60,12 @@ func (i *PromotePath) ValidateAndSetDefaults() error {
|
||||
return errors.Newf(errors.TypeInvalidInput, errors.CodeInvalidInput, "index granularity must be greater than 0")
|
||||
}
|
||||
|
||||
jsonDataType, ok := telemetrytypes.MappingStringToJSONDataType[index.ColumnType]
|
||||
jsonDataType, ok := telemetrytypes.MappingFieldDataTypeToJSONDataType[index.FieldDataType]
|
||||
if !ok {
|
||||
return errors.Newf(errors.TypeInvalidInput, errors.CodeInvalidInput, "invalid column type: %s", index.ColumnType)
|
||||
return errors.Newf(errors.TypeInvalidInput, errors.CodeInvalidInput, "invalid column type: %s", index.FieldDataType)
|
||||
}
|
||||
if !jsonDataType.IndexSupported {
|
||||
return errors.Newf(errors.TypeInvalidInput, errors.CodeInvalidInput, "index is not supported for column type: %s", index.ColumnType)
|
||||
return errors.Newf(errors.TypeInvalidInput, errors.CodeInvalidInput, "index is not supported for column type: %s", index.FieldDataType)
|
||||
}
|
||||
|
||||
i.Indexes[idx].JSONDataType = jsonDataType
|
||||
|
||||
@@ -37,9 +37,9 @@ type TelemetryFieldKey struct {
|
||||
FieldContext FieldContext `json:"fieldContext,omitzero"`
|
||||
FieldDataType FieldDataType `json:"fieldDataType,omitzero"`
|
||||
|
||||
JSONPlan JSONAccessPlan `json:"-"`
|
||||
Indexes []JSONDataTypeIndex `json:"-"`
|
||||
Materialized bool `json:"-"` // refers to promoted in case of body.... fields
|
||||
JSONPlan JSONAccessPlan `json:"-"`
|
||||
Indexes []TelemetryFieldKeySkipIndex `json:"-"`
|
||||
Materialized bool `json:"-"` // refers to promoted in case of body.... fields
|
||||
|
||||
Evolutions []*EvolutionEntry `json:"-"`
|
||||
}
|
||||
@@ -102,7 +102,7 @@ func (f TelemetryFieldKey) String() string {
|
||||
if i > 0 {
|
||||
sb.WriteString("; ")
|
||||
}
|
||||
fmt.Fprintf(&sb, "{type=%s, columnExpr=%s, indexExpr=%s}", index.Type.StringValue(), index.ColumnExpression, index.IndexExpression)
|
||||
fmt.Fprintf(&sb, "{type=%s, indexExpr=%s}", MappingFieldDataTypeToJSONDataType[index.FieldDataType].StringValue(), index.IndexExpression)
|
||||
}
|
||||
sb.WriteString("]")
|
||||
}
|
||||
@@ -400,3 +400,14 @@ func NewFieldValueSelectorFromPostableFieldValueParams(params PostableFieldValue
|
||||
|
||||
return fieldValueSelector
|
||||
}
|
||||
|
||||
type TelemetryFieldKeySkipIndex struct {
|
||||
Name string `json:"name"` // Name is TelemetryFieldKey.Name not IndexName from ClickHouse
|
||||
FieldContext FieldContext `json:"fieldContext,omitzero"`
|
||||
FieldDataType FieldDataType `json:"fieldDataType,omitzero"`
|
||||
BaseColumn string `json:"baseColumn"`
|
||||
IndexName string `json:"indexName"`
|
||||
IndexType string `json:"indexType"`
|
||||
IndexExpression string `json:"indexExpression"`
|
||||
Granularity int `json:"granularity"`
|
||||
}
|
||||
|
||||
@@ -1,11 +1,5 @@
|
||||
package telemetrytypes
|
||||
|
||||
type JSONDataTypeIndex struct {
|
||||
Type JSONDataType
|
||||
ColumnExpression string
|
||||
IndexExpression string
|
||||
}
|
||||
|
||||
type JSONDataType struct {
|
||||
str string // Store the correct case for ClickHouse
|
||||
IsArray bool
|
||||
@@ -32,18 +26,17 @@ var (
|
||||
ArrayJSON = JSONDataType{"Array(JSON)", true, "JSON", false}
|
||||
)
|
||||
|
||||
var MappingStringToJSONDataType = map[string]JSONDataType{
|
||||
"String": String,
|
||||
"Int64": Int64,
|
||||
"Float64": Float64,
|
||||
"Bool": Bool,
|
||||
"Dynamic": Dynamic,
|
||||
"Array(Nullable(String))": ArrayString,
|
||||
"Array(Nullable(Int64))": ArrayInt64,
|
||||
"Array(Nullable(Float64))": ArrayFloat64,
|
||||
"Array(Nullable(Bool))": ArrayBool,
|
||||
"Array(Dynamic)": ArrayDynamic,
|
||||
"Array(JSON)": ArrayJSON,
|
||||
var MappingJSONDataTypeToFieldDataType = map[string]FieldDataType{
|
||||
"String": FieldDataTypeString,
|
||||
"Int64": FieldDataTypeInt64,
|
||||
"Float64": FieldDataTypeFloat64,
|
||||
"Bool": FieldDataTypeBool,
|
||||
"Array(Nullable(String))": FieldDataTypeArrayString,
|
||||
"Array(Nullable(Int64))": FieldDataTypeArrayInt64,
|
||||
"Array(Nullable(Float64))": FieldDataTypeArrayFloat64,
|
||||
"Array(Nullable(Bool))": FieldDataTypeArrayBool,
|
||||
"Array(Dynamic)": FieldDataTypeArrayDynamic,
|
||||
"Array(JSON)": FieldDataTypeArrayJSON,
|
||||
}
|
||||
|
||||
var MappingFieldDataTypeToJSONDataType = map[FieldDataType]JSONDataType{
|
||||
|
||||
@@ -3,7 +3,6 @@ package telemetrytypes
|
||||
import (
|
||||
"context"
|
||||
|
||||
schemamigrator "github.com/SigNoz/signoz-otel-collector/cmd/signozschemamigrator/schema_migrator"
|
||||
"github.com/SigNoz/signoz/pkg/types/metrictypes"
|
||||
)
|
||||
|
||||
@@ -35,7 +34,7 @@ type MetadataStore interface {
|
||||
FetchTemporalityAndTypeMulti(ctx context.Context, queryTimeRangeStartTs, queryTimeRangeEndTs uint64, metricNames ...string) (map[string]metrictypes.Temporality, map[string]metrictypes.Type, error)
|
||||
|
||||
// ListLogsJSONIndexes lists the JSON indexes for the logs table.
|
||||
ListLogsJSONIndexes(ctx context.Context, filters ...string) (map[string][]schemamigrator.Index, error)
|
||||
ListLogsJSONIndexes(ctx context.Context, filters ...string) ([]TelemetryFieldKeySkipIndex, error)
|
||||
|
||||
// ListPromotedPaths lists the promoted paths.
|
||||
GetPromotedPaths(ctx context.Context, paths ...string) (map[string]bool, error)
|
||||
|
||||
@@ -4,7 +4,6 @@ import (
|
||||
"context"
|
||||
"strings"
|
||||
|
||||
schemamigrator "github.com/SigNoz/signoz-otel-collector/cmd/signozschemamigrator/schema_migrator"
|
||||
"github.com/SigNoz/signoz/pkg/types/metrictypes"
|
||||
"github.com/SigNoz/signoz/pkg/types/telemetrytypes"
|
||||
)
|
||||
@@ -18,7 +17,7 @@ type MockMetadataStore struct {
|
||||
TemporalityMap map[string]metrictypes.Temporality
|
||||
TypeMap map[string]metrictypes.Type
|
||||
PromotedPathsMap map[string]bool
|
||||
LogsJSONIndexesMap map[string][]schemamigrator.Index
|
||||
LogsJSONIndexes []telemetrytypes.TelemetryFieldKeySkipIndex
|
||||
ColumnEvolutionMetadataMap map[string][]*telemetrytypes.EvolutionEntry
|
||||
LookupKeysMap map[telemetrytypes.MetricMetadataLookupKey]int64
|
||||
// StaticFields holds signal-specific intrinsic field definitions (e.g. telemetrylogs.IntrinsicFields).
|
||||
@@ -34,7 +33,7 @@ func NewMockMetadataStore() *MockMetadataStore {
|
||||
TemporalityMap: make(map[string]metrictypes.Temporality),
|
||||
TypeMap: make(map[string]metrictypes.Type),
|
||||
PromotedPathsMap: make(map[string]bool),
|
||||
LogsJSONIndexesMap: make(map[string][]schemamigrator.Index),
|
||||
LogsJSONIndexes: []telemetrytypes.TelemetryFieldKeySkipIndex{},
|
||||
ColumnEvolutionMetadataMap: make(map[string][]*telemetrytypes.EvolutionEntry),
|
||||
LookupKeysMap: make(map[telemetrytypes.MetricMetadataLookupKey]int64),
|
||||
StaticFields: make(map[string]telemetrytypes.TelemetryFieldKey),
|
||||
@@ -369,8 +368,8 @@ func (m *MockMetadataStore) GetPromotedPaths(ctx context.Context, paths ...strin
|
||||
}
|
||||
|
||||
// ListLogsJSONIndexes lists the JSON indexes for the logs table.
|
||||
func (m *MockMetadataStore) ListLogsJSONIndexes(ctx context.Context, filters ...string) (map[string][]schemamigrator.Index, error) {
|
||||
return m.LogsJSONIndexesMap, nil
|
||||
func (m *MockMetadataStore) ListLogsJSONIndexes(ctx context.Context, filters ...string) ([]telemetrytypes.TelemetryFieldKeySkipIndex, error) {
|
||||
return m.LogsJSONIndexes, nil
|
||||
}
|
||||
|
||||
func (m *MockMetadataStore) updateColumnEvolutionMetadataForKeys(_ context.Context, keysToUpdate []*telemetrytypes.TelemetryFieldKey) map[string][]*telemetrytypes.EvolutionEntry {
|
||||
|
||||
@@ -23,6 +23,7 @@ pytest_plugins = [
|
||||
"fixtures.notification_channel",
|
||||
"fixtures.alerts",
|
||||
"fixtures.cloudintegrations",
|
||||
"fixtures.jsontypeexporter",
|
||||
]
|
||||
|
||||
|
||||
@@ -78,6 +79,6 @@ def pytest_addoption(parser: pytest.Parser):
|
||||
parser.addoption(
|
||||
"--schema-migrator-version",
|
||||
action="store",
|
||||
default="v0.144.2",
|
||||
default="v0.144.3",
|
||||
help="schema migrator version",
|
||||
)
|
||||
|
||||
473
tests/integration/fixtures/jsontypeexporter.py
Normal file
473
tests/integration/fixtures/jsontypeexporter.py
Normal file
@@ -0,0 +1,473 @@
|
||||
"""
|
||||
Simpler version of jsontypeexporter for test fixtures.
|
||||
This exports JSON type metadata to the path_types table by parsing JSON bodies
|
||||
and extracting all paths with their types, similar to how the real jsontypeexporter works.
|
||||
"""
|
||||
|
||||
import datetime
|
||||
import json
|
||||
from abc import ABC
|
||||
from http import HTTPStatus
|
||||
from typing import (
|
||||
Any,
|
||||
Callable,
|
||||
Dict,
|
||||
Generator,
|
||||
List,
|
||||
Optional,
|
||||
Set,
|
||||
Union,
|
||||
)
|
||||
|
||||
import numpy as np
|
||||
import pytest
|
||||
import requests
|
||||
|
||||
from fixtures import types
|
||||
|
||||
|
||||
class JSONPathType(ABC):
|
||||
"""Represents a JSON path with its type information"""
|
||||
|
||||
field_name: str
|
||||
field_data_type: str
|
||||
last_seen: np.uint64
|
||||
signal: str = "logs"
|
||||
field_context: str = "body"
|
||||
|
||||
def __init__(
|
||||
self,
|
||||
field_name: str,
|
||||
field_data_type: str,
|
||||
last_seen: Optional[datetime.datetime] = None,
|
||||
) -> None:
|
||||
self.field_name = field_name
|
||||
self.field_data_type = field_data_type
|
||||
self.signal = "logs"
|
||||
self.field_context = "body"
|
||||
if last_seen is None:
|
||||
last_seen = datetime.datetime.now()
|
||||
self.last_seen = np.uint64(int(last_seen.timestamp() * 1e9))
|
||||
|
||||
def np_arr(self) -> np.array:
|
||||
"""Return path type data as numpy array for database insertion"""
|
||||
return np.array([self.signal, self.field_context, self.field_name, self.field_data_type, self.last_seen])
|
||||
|
||||
|
||||
# Constants matching jsontypeexporter
|
||||
ARRAY_SEPARATOR = "[]." # Used in paths like "education[].name"
|
||||
ARRAY_SUFFIX = "[]" # Used when traversing into array element objects
|
||||
|
||||
|
||||
def _infer_array_type_from_type_strings(types: List[str]) -> Optional[str]:
|
||||
"""
|
||||
Infer array type from a list of pre-classified type strings.
|
||||
Matches metadataexporter's inferArrayMask logic.
|
||||
|
||||
Internal type strings are: "JSON", "String", "Bool", "Float64", "Int64"
|
||||
|
||||
SuperTyping rules (matching Go inferArrayMask):
|
||||
- JSON alone → []json
|
||||
- JSON + any primitive → []dynamic
|
||||
- String alone → []string; String + other → []dynamic
|
||||
- Float64 wins over Int64 and Bool
|
||||
- Int64 wins over Bool
|
||||
- Bool alone → []bool
|
||||
"""
|
||||
if len(types) == 0:
|
||||
return None
|
||||
|
||||
unique = set(types)
|
||||
|
||||
has_json = "JSON" in unique
|
||||
# hasPrimitive mirrors Go: (hasJSON && len(unique) > 1) || (!hasJSON && len(unique) > 0)
|
||||
has_primitive = (has_json and len(unique) > 1) or (not has_json and len(unique) > 0)
|
||||
|
||||
if has_json:
|
||||
if not has_primitive:
|
||||
return "[]json"
|
||||
return "[]dynamic"
|
||||
|
||||
# ---- Primitive Type Resolution (Float > Int > Bool) ----
|
||||
if "String" in unique:
|
||||
if len(unique) > 1:
|
||||
return "[]dynamic"
|
||||
return "[]string"
|
||||
|
||||
if "Float64" in unique:
|
||||
return "[]float64"
|
||||
if "Int64" in unique:
|
||||
return "[]int64"
|
||||
if "Bool" in unique:
|
||||
return "[]bool"
|
||||
|
||||
return "[]dynamic"
|
||||
|
||||
|
||||
def _infer_array_type(elements: List[Any]) -> Optional[str]:
|
||||
"""
|
||||
Infer array type from raw Python list elements.
|
||||
Classifies each element then delegates to _infer_array_type_from_type_strings.
|
||||
"""
|
||||
if len(elements) == 0:
|
||||
return None
|
||||
|
||||
types = []
|
||||
for elem in elements:
|
||||
if elem is None:
|
||||
continue
|
||||
if isinstance(elem, dict):
|
||||
types.append("JSON")
|
||||
elif isinstance(elem, str):
|
||||
types.append("String")
|
||||
elif isinstance(elem, bool): # must be before int (bool is subclass of int)
|
||||
types.append("Bool")
|
||||
elif isinstance(elem, float):
|
||||
types.append("Float64")
|
||||
elif isinstance(elem, int):
|
||||
types.append("Int64")
|
||||
|
||||
return _infer_array_type_from_type_strings(types)
|
||||
|
||||
|
||||
def _python_type_to_clickhouse_type(value: Any) -> str:
|
||||
"""
|
||||
Convert Python type to ClickHouse JSON type string.
|
||||
Maps Python types to ClickHouse JSON data types.
|
||||
Matches metadataexporter's mapPCommonValueTypeToDataType.
|
||||
"""
|
||||
if isinstance(value, bool):
|
||||
return "bool"
|
||||
elif isinstance(value, int):
|
||||
return "int64"
|
||||
elif isinstance(value, float):
|
||||
return "float64"
|
||||
elif isinstance(value, str):
|
||||
return "string"
|
||||
elif isinstance(value, list):
|
||||
# Use the sophisticated array type inference
|
||||
array_type = _infer_array_type(value)
|
||||
return array_type if array_type else "[]dynamic"
|
||||
elif isinstance(value, dict):
|
||||
return "json"
|
||||
else:
|
||||
return "string" # Default fallback
|
||||
|
||||
|
||||
def _extract_json_paths(
|
||||
obj: Any,
|
||||
current_path: str = "",
|
||||
path_types: Optional[Dict[str, Set[str]]] = None,
|
||||
level: int = 0,
|
||||
) -> Dict[str, Set[str]]:
|
||||
"""
|
||||
Recursively extract all paths and their types from a JSON object.
|
||||
Matches jsontypeexporter's analyzePValue logic.
|
||||
|
||||
Args:
|
||||
obj: The JSON object to traverse
|
||||
current_path: Current path being built (e.g., "user.name")
|
||||
path_types: Dictionary mapping paths to sets of types found
|
||||
level: Current nesting level (for depth limiting)
|
||||
|
||||
Returns:
|
||||
Dictionary mapping paths to sets of type strings
|
||||
"""
|
||||
if path_types is None:
|
||||
path_types = {}
|
||||
|
||||
if obj is None:
|
||||
# Skip null values — matches Go walkNode which errors on ValueTypeEmpty
|
||||
return path_types
|
||||
|
||||
if isinstance(obj, dict):
|
||||
# For objects, recurse into keys without recording the object itself as a type.
|
||||
# Matches Go walkMap which recurses without calling ta.record on the map node.
|
||||
|
||||
for key, value in obj.items():
|
||||
# Build the path for this key
|
||||
if current_path:
|
||||
new_path = f"{current_path}.{key}"
|
||||
else:
|
||||
new_path = key
|
||||
|
||||
# Recurse into the value
|
||||
_extract_json_paths(value, new_path, path_types, level + 1)
|
||||
|
||||
elif isinstance(obj, list):
|
||||
# Skip empty arrays
|
||||
if len(obj) == 0:
|
||||
return path_types
|
||||
|
||||
# Collect types from array elements (matching Go: types := make([]pcommon.ValueType, 0, s.Len()))
|
||||
types = []
|
||||
|
||||
for item in obj:
|
||||
if isinstance(item, dict):
|
||||
# When traversing into array element objects, use ArraySuffix ([])
|
||||
# This matches: prefix+ArraySuffix in the Go code
|
||||
# Example: if current_path is "education", we use "education[]" to traverse into objects
|
||||
array_prefix = current_path + ARRAY_SUFFIX if current_path else ""
|
||||
for key, value in item.items():
|
||||
if array_prefix:
|
||||
# Use array separator: education[].name
|
||||
array_path = f"{array_prefix}.{key}"
|
||||
else:
|
||||
array_path = key
|
||||
# Recurse without increasing level (matching Go behavior)
|
||||
_extract_json_paths(value, array_path, path_types, level)
|
||||
types.append("JSON")
|
||||
elif isinstance(item, list):
|
||||
# Arrays inside arrays are not supported - skip the whole path
|
||||
# Matching Go: e.logger.Error("arrays inside arrays are not supported!", ...); return nil
|
||||
return path_types
|
||||
elif isinstance(item, str):
|
||||
types.append("String")
|
||||
elif isinstance(item, bool):
|
||||
types.append("Bool")
|
||||
elif isinstance(item, float):
|
||||
types.append("Float64")
|
||||
elif isinstance(item, int):
|
||||
types.append("Int64")
|
||||
|
||||
# Infer array type from collected types (matching Go: if mask := inferArrayMask(types); mask != 0)
|
||||
if len(types) > 0:
|
||||
array_type = _infer_array_type_from_type_strings(types)
|
||||
if array_type and current_path:
|
||||
if current_path not in path_types:
|
||||
path_types[current_path] = set()
|
||||
path_types[current_path].add(array_type)
|
||||
|
||||
else:
|
||||
# Primitive value (string, number, bool)
|
||||
if current_path:
|
||||
if current_path not in path_types:
|
||||
path_types[current_path] = set()
|
||||
obj_type = _python_type_to_clickhouse_type(obj)
|
||||
path_types[current_path].add(obj_type)
|
||||
|
||||
return path_types
|
||||
|
||||
|
||||
def _parse_json_bodies_and_extract_paths(
|
||||
json_bodies: List[str],
|
||||
timestamp: Optional[datetime.datetime] = None,
|
||||
) -> List[JSONPathType]:
|
||||
"""
|
||||
Parse JSON bodies and extract all paths with their types.
|
||||
This mimics the behavior of jsontypeexporter.
|
||||
|
||||
Args:
|
||||
json_bodies: List of JSON body strings to parse
|
||||
timestamp: Timestamp to use for last_seen (defaults to now)
|
||||
|
||||
Returns:
|
||||
List of JSONPathType objects with all discovered paths and types
|
||||
"""
|
||||
if timestamp is None:
|
||||
timestamp = datetime.datetime.now()
|
||||
|
||||
# Aggregate all paths and their types across all JSON bodies
|
||||
all_path_types: Dict[str, Set[str]] = {}
|
||||
|
||||
for json_body in json_bodies:
|
||||
try:
|
||||
parsed = json.loads(json_body)
|
||||
_extract_json_paths(parsed, "", all_path_types, level=0)
|
||||
except (json.JSONDecodeError, TypeError):
|
||||
# Skip invalid JSON
|
||||
continue
|
||||
|
||||
# Convert to list of JSONPathType objects
|
||||
# Each path can have multiple types, so we create one JSONPathType per type
|
||||
path_type_objects: List[JSONPathType] = []
|
||||
for path, types_set in all_path_types.items():
|
||||
for type_str in types_set:
|
||||
path_type_objects.append(
|
||||
JSONPathType(field_name=path, field_data_type=type_str, last_seen=timestamp)
|
||||
)
|
||||
|
||||
return path_type_objects
|
||||
|
||||
|
||||
@pytest.fixture(name="export_json_types", scope="function")
|
||||
def export_json_types(
|
||||
clickhouse: types.TestContainerClickhouse,
|
||||
request: pytest.FixtureRequest, # To access migrator fixture
|
||||
) -> Generator[
|
||||
Callable[[Union[List[JSONPathType], List[str], List[Any]]], None], Any, None
|
||||
]:
|
||||
"""
|
||||
Fixture for exporting JSON type metadata to the path_types table.
|
||||
This is a simpler version of jsontypeexporter for test fixtures.
|
||||
|
||||
The function can accept:
|
||||
1. List of JSONPathType objects (manual specification)
|
||||
2. List of JSON body strings (auto-extract paths)
|
||||
3. List of Logs objects (extract from body_json field)
|
||||
|
||||
Usage examples:
|
||||
# Manual specification
|
||||
export_json_types([
|
||||
JSONPathType(field_name="user.name", field_data_type="string"),
|
||||
JSONPathType(field_name="user.age", field_data_type="int64"),
|
||||
])
|
||||
|
||||
# Auto-extract from JSON strings
|
||||
export_json_types([
|
||||
'{"user": {"name": "alice", "age": 25}}',
|
||||
'{"user": {"name": "bob", "age": 30}}',
|
||||
])
|
||||
|
||||
# Auto-extract from Logs objects
|
||||
export_json_types(logs_list)
|
||||
"""
|
||||
# Ensure migrator has run to create the table
|
||||
try:
|
||||
request.getfixturevalue("migrator")
|
||||
except Exception:
|
||||
# If migrator fixture is not available, that's okay - table might already exist
|
||||
pass
|
||||
|
||||
def _export_json_types(
|
||||
data: Union[
|
||||
List[JSONPathType], List[str], List[Any]
|
||||
], # List[Logs] but avoiding circular import
|
||||
) -> None:
|
||||
"""
|
||||
Export JSON type metadata to signoz_metadata.distributed_field_keys table.
|
||||
This table stores signal, context, path, and type information for body JSON fields.
|
||||
"""
|
||||
path_types: List[JSONPathType] = []
|
||||
|
||||
if len(data) == 0:
|
||||
return
|
||||
|
||||
# Determine input type and convert to JSONPathType list
|
||||
first_item = data[0]
|
||||
|
||||
if isinstance(first_item, JSONPathType):
|
||||
# Already JSONPathType objects
|
||||
path_types = data # type: ignore
|
||||
elif isinstance(first_item, str):
|
||||
# List of JSON strings - parse and extract paths
|
||||
path_types = _parse_json_bodies_and_extract_paths(data) # type: ignore
|
||||
else:
|
||||
# Assume it's a list of Logs objects - extract body_v2
|
||||
json_bodies: List[str] = []
|
||||
for log in data: # type: ignore
|
||||
# Try to get body_v2 attribute
|
||||
if hasattr(log, "body_v2") and log.body_v2:
|
||||
json_bodies.append(log.body_v2)
|
||||
elif hasattr(log, "body") and log.body:
|
||||
# Fallback to body if body_v2 not available
|
||||
try:
|
||||
# Try to parse as JSON
|
||||
json.loads(log.body)
|
||||
json_bodies.append(log.body)
|
||||
except (json.JSONDecodeError, TypeError):
|
||||
pass
|
||||
|
||||
if json_bodies:
|
||||
path_types = _parse_json_bodies_and_extract_paths(json_bodies)
|
||||
|
||||
if len(path_types) == 0:
|
||||
return
|
||||
|
||||
clickhouse.conn.insert(
|
||||
database="signoz_metadata",
|
||||
table="distributed_field_keys",
|
||||
data=[path_type.np_arr() for path_type in path_types],
|
||||
column_names=[
|
||||
"signal",
|
||||
"field_context",
|
||||
"field_name",
|
||||
"field_data_type",
|
||||
"last_seen",
|
||||
],
|
||||
)
|
||||
|
||||
yield _export_json_types
|
||||
|
||||
# Cleanup - truncate the local table after tests (following pattern from logs fixture)
|
||||
clickhouse.conn.query(
|
||||
f"TRUNCATE TABLE signoz_metadata.field_keys ON CLUSTER '{clickhouse.env['SIGNOZ_TELEMETRYSTORE_CLICKHOUSE_CLUSTER']}' SYNC"
|
||||
)
|
||||
|
||||
@pytest.fixture(name="create_json_index", scope="function")
|
||||
def create_json_index(
|
||||
signoz: types.SigNoz,
|
||||
) -> Generator[Callable[[str, List[Dict[str, Any]]], None], None, None]:
|
||||
"""
|
||||
Create ClickHouse data-skipping indexes on body_v2 JSON sub-columns via
|
||||
POST /api/v1/logs/promote_paths.
|
||||
|
||||
**Must be called BEFORE insert_logs** so that newly inserted data parts are
|
||||
covered by the index and the QB uses the indexed condition path.
|
||||
|
||||
Each entry in `paths` follows the PromotePath API shape:
|
||||
{
|
||||
"path": "body.user.name", # must start with "body."
|
||||
"indexes": [
|
||||
{
|
||||
"fieldDataType": "string", # string | int64 | float64
|
||||
"type": "ngrambf_v1(3, 256, 2, 0)", # or "minmax", "tokenbf_v1(...)"
|
||||
"granularity": 1,
|
||||
}
|
||||
],
|
||||
}
|
||||
|
||||
Teardown drops every index created during the test by querying
|
||||
system.data_skipping_indices for matching expressions.
|
||||
|
||||
Example::
|
||||
|
||||
def test_foo(signoz, get_token, insert_logs, export_json_types, create_json_body_index):
|
||||
token = get_token(...)
|
||||
export_json_types(logs_list)
|
||||
create_json_body_index(token, [
|
||||
{"path": "body.user.name",
|
||||
"indexes": [{"fieldDataType": "string", "type": "ngrambf_v1(3, 256, 2, 0)", "granularity": 1}]},
|
||||
{"path": "body.user.age",
|
||||
"indexes": [{"fieldDataType": "int64", "type": "minmax", "granularity": 1}]},
|
||||
])
|
||||
insert_logs(logs_list) # data inserted after index exists — index is built automatically
|
||||
"""
|
||||
created_paths: List[str] = []
|
||||
|
||||
def _create_json_body_index(token: str, paths: List[Dict[str, Any]]) -> None:
|
||||
response = requests.post(
|
||||
signoz.self.host_configs["8080"].get("/api/v1/logs/promote_paths"),
|
||||
headers={"authorization": f"Bearer {token}"},
|
||||
json=paths,
|
||||
timeout=30,
|
||||
)
|
||||
assert response.status_code == HTTPStatus.CREATED, (
|
||||
f"Failed to create JSON body indexes: "
|
||||
f"{response.status_code} {response.text}"
|
||||
)
|
||||
for path in paths:
|
||||
# The API strips the "body." prefix before storing — mirror that here
|
||||
# so our cleanup query uses the bare path (e.g. "user.name").
|
||||
raw = path["path"].removeprefix("body.")
|
||||
if raw not in created_paths:
|
||||
created_paths.append(raw)
|
||||
|
||||
yield _create_json_body_index
|
||||
|
||||
if not created_paths:
|
||||
return
|
||||
|
||||
cluster = signoz.telemetrystore.env["SIGNOZ_TELEMETRYSTORE_CLICKHOUSE_CLUSTER"]
|
||||
for path in created_paths:
|
||||
result = signoz.telemetrystore.conn.query(
|
||||
"SELECT name FROM system.data_skipping_indices "
|
||||
"WHERE database = 'signoz_logs' AND table = 'logs_v2' "
|
||||
f"AND expr LIKE '%{path}%'"
|
||||
)
|
||||
for (index_name,) in result.result_rows:
|
||||
signoz.telemetrystore.conn.query(
|
||||
f"ALTER TABLE signoz_logs.logs_v2 "
|
||||
f"ON CLUSTER '{cluster}' "
|
||||
f"DROP INDEX IF EXISTS `{index_name}`"
|
||||
)
|
||||
@@ -122,6 +122,8 @@ class Logs(ABC):
|
||||
resources: dict[str, Any] = {},
|
||||
attributes: dict[str, Any] = {},
|
||||
body: str = "default body",
|
||||
body_v2: Optional[str] = None,
|
||||
body_promoted: Optional[str] = None,
|
||||
severity_text: str = "INFO",
|
||||
trace_id: str = "",
|
||||
span_id: str = "",
|
||||
@@ -167,6 +169,33 @@ class Logs(ABC):
|
||||
# Set body
|
||||
self.body = body
|
||||
|
||||
# Set body_v2 - if body is JSON, parse and stringify it, otherwise use empty string
|
||||
# ClickHouse accepts String input for JSON column
|
||||
if body_v2 is not None:
|
||||
self.body_v2 = body_v2
|
||||
else:
|
||||
# Try to parse body as JSON; if successful use it directly,
|
||||
# otherwise wrap as {"message": body} matching the normalize operator behavior.
|
||||
try:
|
||||
json.loads(body)
|
||||
self.body_v2 = body
|
||||
except (json.JSONDecodeError, TypeError):
|
||||
self.body_v2 = json.dumps({"message": body})
|
||||
|
||||
# Set body_promoted - must be valid JSON
|
||||
# Tests will explicitly pass promoted column's content, but we validate it
|
||||
if body_promoted is not None:
|
||||
# Validate that it's valid JSON
|
||||
try:
|
||||
json.loads(body_promoted)
|
||||
self.body_promoted = body_promoted
|
||||
except (json.JSONDecodeError, TypeError):
|
||||
# If invalid, default to empty JSON object
|
||||
self.body_promoted = "{}"
|
||||
else:
|
||||
# Default to empty JSON object (valid JSON)
|
||||
self.body_promoted = "{}"
|
||||
|
||||
# Process resources and attributes
|
||||
self.resources_string = {k: str(v) for k, v in resources.items()}
|
||||
self.resource_json = (
|
||||
@@ -326,6 +355,8 @@ class Logs(ABC):
|
||||
self.severity_text,
|
||||
self.severity_number,
|
||||
self.body,
|
||||
self.body_v2,
|
||||
self.body_promoted,
|
||||
self.attributes_string,
|
||||
self.attributes_number,
|
||||
self.attributes_bool,
|
||||
@@ -454,31 +485,53 @@ def insert_logs(
|
||||
data=[resource_key.np_arr() for resource_key in resource_keys],
|
||||
)
|
||||
|
||||
# All columns in insertion order (must match Logs.np_arr() order)
|
||||
all_column_names = [
|
||||
"ts_bucket_start",
|
||||
"resource_fingerprint",
|
||||
"timestamp",
|
||||
"observed_timestamp",
|
||||
"id",
|
||||
"trace_id",
|
||||
"span_id",
|
||||
"trace_flags",
|
||||
"severity_text",
|
||||
"severity_number",
|
||||
"body",
|
||||
"body_v2",
|
||||
"body_promoted",
|
||||
"attributes_string",
|
||||
"attributes_number",
|
||||
"attributes_bool",
|
||||
"resources_string",
|
||||
"scope_name",
|
||||
"scope_version",
|
||||
"scope_string",
|
||||
"resource",
|
||||
]
|
||||
|
||||
# Check if body_v2 column exists (only present when ENABLE_LOGS_MIGRATIONS_V2 migration has run)
|
||||
result = clickhouse.conn.query(
|
||||
"SELECT count() FROM system.columns WHERE database = 'signoz_logs' AND table = 'logs_v2' AND name = 'body_v2'"
|
||||
)
|
||||
has_json_body = result.result_rows[0][0] > 0
|
||||
|
||||
if has_json_body:
|
||||
column_names = all_column_names
|
||||
data = [log.np_arr() for log in logs]
|
||||
else:
|
||||
json_body_cols = {"body_v2", "body_promoted"}
|
||||
keep_indices = [
|
||||
i for i, c in enumerate(all_column_names) if c not in json_body_cols
|
||||
]
|
||||
column_names = [all_column_names[i] for i in keep_indices]
|
||||
data = [log.np_arr()[keep_indices] for log in logs]
|
||||
|
||||
clickhouse.conn.insert(
|
||||
database="signoz_logs",
|
||||
table="distributed_logs_v2",
|
||||
data=[log.np_arr() for log in logs],
|
||||
column_names=[
|
||||
"ts_bucket_start",
|
||||
"resource_fingerprint",
|
||||
"timestamp",
|
||||
"observed_timestamp",
|
||||
"id",
|
||||
"trace_id",
|
||||
"span_id",
|
||||
"trace_flags",
|
||||
"severity_text",
|
||||
"severity_number",
|
||||
"body",
|
||||
"attributes_string",
|
||||
"attributes_number",
|
||||
"attributes_bool",
|
||||
"resources_string",
|
||||
"scope_name",
|
||||
"scope_version",
|
||||
"scope_string",
|
||||
"resource",
|
||||
],
|
||||
data=data,
|
||||
column_names=column_names,
|
||||
)
|
||||
|
||||
yield _insert_logs
|
||||
|
||||
@@ -1,3 +1,5 @@
|
||||
from typing import Optional
|
||||
|
||||
import docker
|
||||
import pytest
|
||||
from testcontainers.core.container import Network
|
||||
@@ -8,27 +10,32 @@ from fixtures.logger import setup_logger
|
||||
logger = setup_logger(__name__)
|
||||
|
||||
|
||||
@pytest.fixture(name="migrator", scope="package")
|
||||
def migrator(
|
||||
def create_migrator(
|
||||
network: Network,
|
||||
clickhouse: types.TestContainerClickhouse,
|
||||
request: pytest.FixtureRequest,
|
||||
pytestconfig: pytest.Config,
|
||||
cache_key: str = "migrator",
|
||||
env_overrides: Optional[dict] = None,
|
||||
) -> types.Operation:
|
||||
"""
|
||||
Package-scoped fixture for running schema migrations.
|
||||
Factory function for running schema migrations.
|
||||
Accepts optional env_overrides to customize the migrator environment.
|
||||
"""
|
||||
|
||||
def create() -> None:
|
||||
version = request.config.getoption("--schema-migrator-version")
|
||||
client = docker.from_env()
|
||||
|
||||
environment = dict(env_overrides) if env_overrides else {}
|
||||
|
||||
container = client.containers.run(
|
||||
image=f"signoz/signoz-schema-migrator:{version}",
|
||||
command=f"sync --replication=true --cluster-name=cluster --up= --dsn={clickhouse.env["SIGNOZ_TELEMETRYSTORE_CLICKHOUSE_DSN"]}",
|
||||
detach=True,
|
||||
auto_remove=False,
|
||||
network=network.id,
|
||||
environment=environment,
|
||||
)
|
||||
|
||||
result = container.wait()
|
||||
@@ -47,6 +54,7 @@ def migrator(
|
||||
detach=True,
|
||||
auto_remove=False,
|
||||
network=network.id,
|
||||
environment=environment,
|
||||
)
|
||||
|
||||
result = container.wait()
|
||||
@@ -59,7 +67,7 @@ def migrator(
|
||||
|
||||
container.remove()
|
||||
|
||||
return types.Operation(name="migrator")
|
||||
return types.Operation(name=cache_key)
|
||||
|
||||
def delete(_: types.Operation) -> None:
|
||||
pass
|
||||
@@ -70,9 +78,27 @@ def migrator(
|
||||
return dev.wrap(
|
||||
request,
|
||||
pytestconfig,
|
||||
"migrator",
|
||||
cache_key,
|
||||
lambda: types.Operation(name=""),
|
||||
create,
|
||||
delete,
|
||||
restore,
|
||||
)
|
||||
|
||||
|
||||
@pytest.fixture(name="migrator", scope="package")
|
||||
def migrator(
|
||||
network: Network,
|
||||
clickhouse: types.TestContainerClickhouse,
|
||||
request: pytest.FixtureRequest,
|
||||
pytestconfig: pytest.Config,
|
||||
) -> types.Operation:
|
||||
"""
|
||||
Package-scoped fixture for running schema migrations.
|
||||
"""
|
||||
return create_migrator(
|
||||
network=network,
|
||||
clickhouse=clickhouse,
|
||||
request=request,
|
||||
pytestconfig=pytestconfig,
|
||||
)
|
||||
|
||||
1716
tests/integration/src/querier_json_body/01_logs_json_body_new_qb.py
Normal file
1716
tests/integration/src/querier_json_body/01_logs_json_body_new_qb.py
Normal file
File diff suppressed because it is too large
Load Diff
0
tests/integration/src/querier_json_body/__init__.py
Normal file
0
tests/integration/src/querier_json_body/__init__.py
Normal file
70
tests/integration/src/querier_json_body/conftest.py
Normal file
70
tests/integration/src/querier_json_body/conftest.py
Normal file
@@ -0,0 +1,70 @@
|
||||
import pytest
|
||||
from testcontainers.core.container import Network
|
||||
|
||||
from fixtures import types
|
||||
from fixtures.migrator import create_migrator
|
||||
from fixtures.signoz import create_signoz
|
||||
|
||||
UNSUPPORTED_CLICKHOUSE_VERSIONS = {"25.5.6"}
|
||||
|
||||
|
||||
def pytest_collection_modifyitems(
|
||||
config: pytest.Config, items: list[pytest.Item]
|
||||
) -> None:
|
||||
version = config.getoption("--clickhouse-version")
|
||||
if version in UNSUPPORTED_CLICKHOUSE_VERSIONS:
|
||||
skip = pytest.mark.skip(
|
||||
reason=f"JSON body QB tests require ClickHouse > {version}"
|
||||
)
|
||||
for item in items:
|
||||
item.add_marker(skip)
|
||||
|
||||
|
||||
@pytest.fixture(name="migrator", scope="package")
|
||||
def migrator_json(
|
||||
network: Network,
|
||||
clickhouse: types.TestContainerClickhouse,
|
||||
request: pytest.FixtureRequest,
|
||||
pytestconfig: pytest.Config,
|
||||
) -> types.Operation:
|
||||
"""
|
||||
Package-scoped migrator with ENABLE_LOGS_MIGRATIONS_V2=1.
|
||||
"""
|
||||
return create_migrator(
|
||||
network=network,
|
||||
clickhouse=clickhouse,
|
||||
request=request,
|
||||
pytestconfig=pytestconfig,
|
||||
cache_key="migrator-json-body",
|
||||
env_overrides={
|
||||
"ENABLE_LOGS_MIGRATIONS_V2": "1",
|
||||
},
|
||||
)
|
||||
|
||||
|
||||
@pytest.fixture(name="signoz", scope="package")
|
||||
def signoz_json_body(
|
||||
network: Network,
|
||||
zeus: types.TestContainerDocker,
|
||||
gateway: types.TestContainerDocker,
|
||||
sqlstore: types.TestContainerSQL,
|
||||
clickhouse: types.TestContainerClickhouse,
|
||||
request: pytest.FixtureRequest,
|
||||
pytestconfig: pytest.Config,
|
||||
) -> types.SigNoz:
|
||||
"""
|
||||
Package-scoped fixture for SigNoz with BODY_JSON_QUERY_ENABLED=true.
|
||||
"""
|
||||
return create_signoz(
|
||||
network=network,
|
||||
zeus=zeus,
|
||||
gateway=gateway,
|
||||
sqlstore=sqlstore,
|
||||
clickhouse=clickhouse,
|
||||
request=request,
|
||||
pytestconfig=pytestconfig,
|
||||
cache_key="signoz-json-body",
|
||||
env_overrides={
|
||||
"BODY_JSON_QUERY_ENABLED": "true",
|
||||
},
|
||||
)
|
||||
Reference in New Issue
Block a user