Compare commits

..

2 Commits

Author SHA1 Message Date
aks07
df9945ccfc feat: algo change 2026-06-02 22:36:52 +05:30
Nityananda Gohain
a71ac2ada6 fix: add adjustkeys in trace operator cte builder (#11349)
* fix: add adjustkeys in trace operator cte builder

* fix: more fixes

* fix: cleanup

* fix: move tests to trace operator file

* fix: address comments

* fix: lint issues

---------

Co-authored-by: Srikanth Chekuri <srikanth.chekuri92@gmail.com>
2026-06-02 10:59:15 +00:00
23 changed files with 708 additions and 905 deletions

View File

@@ -440,17 +440,6 @@ tracedetail:
max_depth_to_auto_expand: 5
# Threshold below which all spans are returned without windowing.
max_limit_to_select_all_spans: 10000
flamegraph:
# Maximum number of BFS depth levels included in a windowed response.
max_selected_levels: 50
# Maximum spans per level before sampling is applied.
max_spans_per_level: 100
# Number of highest-latency spans always included when sampling a level.
sampling_top_latency_count: 5
# Number of timestamp buckets used for uniform sampling within a level.
sampling_bucket_count: 50
# Threshold below which all spans are returned without windowing or sampling.
select_all_spans_limit: 100000
##################### Authz #################################
authz:

View File

@@ -6516,61 +6516,6 @@ components:
- attribute
- resource
type: string
SpantypesFlamegraphSpan:
properties:
attributes:
additionalProperties: {}
type: object
durationNano:
minimum: 0
type: integer
event:
items:
$ref: '#/components/schemas/SpantypesEvent'
type: array
hasError:
type: boolean
level:
format: int64
type: integer
name:
type: string
parentSpanId:
type: string
resource:
additionalProperties:
type: string
type: object
spanId:
type: string
timestamp:
minimum: 0
type: integer
required:
- event
type: object
SpantypesGettableFlamegraphTrace:
properties:
endTimestampMillis:
format: int64
type: integer
hasMore:
type: boolean
spans:
items:
items:
$ref: '#/components/schemas/SpantypesFlamegraphSpan'
type: array
type: array
startTimestampMillis:
format: int64
type: integer
required:
- spans
- startTimestampMillis
- endTimestampMillis
- hasMore
type: object
SpantypesGettableSpanMapperGroups:
properties:
items:
@@ -6636,15 +6581,6 @@ components:
traceId:
type: string
type: object
SpantypesPostableFlamegraph:
properties:
selectFields:
items:
$ref: '#/components/schemas/TelemetrytypesTelemetryFieldKey'
type: array
selectedSpanId:
type: string
type: object
SpantypesPostableSpanMapper:
properties:
config:
@@ -20091,75 +20027,6 @@ paths:
summary: Put profile in Zeus for a deployment.
tags:
- zeus
/api/v3/traces/{traceID}/flamegraph:
post:
deprecated: false
description: Returns the flamegraph view of spans for a given trace ID.
operationId: GetFlamegraph
parameters:
- in: path
name: traceID
required: true
schema:
type: string
requestBody:
content:
application/json:
schema:
$ref: '#/components/schemas/SpantypesPostableFlamegraph'
responses:
"200":
content:
application/json:
schema:
properties:
data:
$ref: '#/components/schemas/SpantypesGettableFlamegraphTrace'
status:
type: string
required:
- status
- data
type: object
description: OK
"400":
content:
application/json:
schema:
$ref: '#/components/schemas/RenderErrorResponse'
description: Bad Request
"401":
content:
application/json:
schema:
$ref: '#/components/schemas/RenderErrorResponse'
description: Unauthorized
"403":
content:
application/json:
schema:
$ref: '#/components/schemas/RenderErrorResponse'
description: Forbidden
"404":
content:
application/json:
schema:
$ref: '#/components/schemas/RenderErrorResponse'
description: Not Found
"500":
content:
application/json:
schema:
$ref: '#/components/schemas/RenderErrorResponse'
description: Internal Server Error
security:
- api_key:
- VIEWER
- tokenizer:
- VIEWER
summary: Get flamegraph view for a trace
tags:
- tracedetail
/api/v3/traces/{traceID}/waterfall:
post:
deprecated: false

View File

@@ -7675,77 +7675,6 @@ export enum SpantypesFieldContextDTO {
attribute = 'attribute',
resource = 'resource',
}
export type SpantypesFlamegraphSpanDTOAttributes = { [key: string]: unknown };
export type SpantypesFlamegraphSpanDTOResource = { [key: string]: string };
export interface SpantypesFlamegraphSpanDTO {
/**
* @type object
*/
attributes?: SpantypesFlamegraphSpanDTOAttributes;
/**
* @type integer
* @minimum 0
*/
durationNano?: number;
/**
* @type array
*/
event: SpantypesEventDTO[];
/**
* @type boolean
*/
hasError?: boolean;
/**
* @type integer
* @format int64
*/
level?: number;
/**
* @type string
*/
name?: string;
/**
* @type string
*/
parentSpanId?: string;
/**
* @type object
*/
resource?: SpantypesFlamegraphSpanDTOResource;
/**
* @type string
*/
spanId?: string;
/**
* @type integer
* @minimum 0
*/
timestamp?: number;
}
export interface SpantypesGettableFlamegraphTraceDTO {
/**
* @type integer
* @format int64
*/
endTimestampMillis: number;
/**
* @type boolean
*/
hasMore: boolean;
/**
* @type array
*/
spans: SpantypesFlamegraphSpanDTO[][];
/**
* @type integer
* @format int64
*/
startTimestampMillis: number;
}
export type SpantypesSpanMapperGroupConditionDTOAnyOf = {
/**
* @type array,null
@@ -8047,17 +7976,6 @@ export interface SpantypesGettableWaterfallTraceDTO {
uncollapsedSpans?: string[] | null;
}
export interface SpantypesPostableFlamegraphDTO {
/**
* @type array
*/
selectFields?: TelemetrytypesTelemetryFieldKeyDTO[];
/**
* @type string
*/
selectedSpanId?: string;
}
export enum SpantypesSpanMapperOperationDTO {
move = 'move',
copy = 'copy',
@@ -10359,17 +10277,6 @@ export type GetHosts200 = {
status: string;
};
export type GetFlamegraphPathParameters = {
traceID: string;
};
export type GetFlamegraph200 = {
data: SpantypesGettableFlamegraphTraceDTO;
/**
* @type string
*/
status: string;
};
export type GetWaterfallPathParameters = {
traceID: string;
};

View File

@@ -12,8 +12,6 @@ import type {
} from 'react-query';
import type {
GetFlamegraph200,
GetFlamegraphPathParameters,
GetTraceAggregations200,
GetTraceAggregationsPathParameters,
GetWaterfall200,
@@ -21,7 +19,6 @@ import type {
GetWaterfallV4200,
GetWaterfallV4PathParameters,
RenderErrorResponseDTO,
SpantypesPostableFlamegraphDTO,
SpantypesPostableTraceAggregationsDTO,
SpantypesPostableWaterfallDTO,
} from '../sigNoz.schemas';
@@ -129,105 +126,6 @@ export const useGetTraceAggregations = <
> => {
return useMutation(getGetTraceAggregationsMutationOptions(options));
};
/**
* Returns the flamegraph view of spans for a given trace ID.
* @summary Get flamegraph view for a trace
*/
export const getFlamegraph = (
{ traceID }: GetFlamegraphPathParameters,
spantypesPostableFlamegraphDTO?: BodyType<SpantypesPostableFlamegraphDTO>,
signal?: AbortSignal,
) => {
return GeneratedAPIInstance<GetFlamegraph200>({
url: `/api/v3/traces/${traceID}/flamegraph`,
method: 'POST',
headers: { 'Content-Type': 'application/json' },
data: spantypesPostableFlamegraphDTO,
signal,
});
};
export const getGetFlamegraphMutationOptions = <
TError = ErrorType<RenderErrorResponseDTO>,
TContext = unknown,
>(options?: {
mutation?: UseMutationOptions<
Awaited<ReturnType<typeof getFlamegraph>>,
TError,
{
pathParams: GetFlamegraphPathParameters;
data?: BodyType<SpantypesPostableFlamegraphDTO>;
},
TContext
>;
}): UseMutationOptions<
Awaited<ReturnType<typeof getFlamegraph>>,
TError,
{
pathParams: GetFlamegraphPathParameters;
data?: BodyType<SpantypesPostableFlamegraphDTO>;
},
TContext
> => {
const mutationKey = ['getFlamegraph'];
const { mutation: mutationOptions } = options
? options.mutation &&
'mutationKey' in options.mutation &&
options.mutation.mutationKey
? options
: { ...options, mutation: { ...options.mutation, mutationKey } }
: { mutation: { mutationKey } };
const mutationFn: MutationFunction<
Awaited<ReturnType<typeof getFlamegraph>>,
{
pathParams: GetFlamegraphPathParameters;
data?: BodyType<SpantypesPostableFlamegraphDTO>;
}
> = (props) => {
const { pathParams, data } = props ?? {};
return getFlamegraph(pathParams, data);
};
return { mutationFn, ...mutationOptions };
};
export type GetFlamegraphMutationResult = NonNullable<
Awaited<ReturnType<typeof getFlamegraph>>
>;
export type GetFlamegraphMutationBody =
| BodyType<SpantypesPostableFlamegraphDTO>
| undefined;
export type GetFlamegraphMutationError = ErrorType<RenderErrorResponseDTO>;
/**
* @summary Get flamegraph view for a trace
*/
export const useGetFlamegraph = <
TError = ErrorType<RenderErrorResponseDTO>,
TContext = unknown,
>(options?: {
mutation?: UseMutationOptions<
Awaited<ReturnType<typeof getFlamegraph>>,
TError,
{
pathParams: GetFlamegraphPathParameters;
data?: BodyType<SpantypesPostableFlamegraphDTO>;
},
TContext
>;
}): UseMutationResult<
Awaited<ReturnType<typeof getFlamegraph>>,
TError,
{
pathParams: GetFlamegraphPathParameters;
data?: BodyType<SpantypesPostableFlamegraphDTO>;
},
TContext
> => {
return useMutation(getGetFlamegraphMutationOptions(options));
};
/**
* Returns the waterfall view of spans for a given trace ID with tree structure, metadata, and windowed pagination
* @summary Get waterfall view for a trace

View File

@@ -472,4 +472,98 @@ describe('computeVisualLayout', () => {
expect(aRow).toBeGreaterThan(1); // must NOT be at row 1
expect(aRow).toBe(3); // next free row after B at row 2 (A overlaps B)
});
// --- Wide-group fast path (> WIDE_GROUP_THRESHOLD siblings) ---
// Past the threshold the layout switches to exact overlap-only packing to
// avoid the O(N^2) connector-avoidance spiral. These lock in correctness and
// the no-overlap invariant at scale.
function noRowHasOverlap(
layout: ReturnType<typeof computeVisualLayout>,
): void {
for (const row of layout.visualRows) {
const sorted = [...row].sort((a, b) => a.timestamp - b.timestamp);
for (let i = 1; i < sorted.length; i++) {
const prevEnd = sorted[i - 1].timestamp + sorted[i - 1].durationNano / 1e6;
expect(sorted[i].timestamp).toBeGreaterThanOrEqual(prevEnd);
}
}
}
it('should pack thousands of sequential leaf siblings into 1 row (wide path)', () => {
const root = makeSpan({ spanId: 'root', timestamp: 0, durationNano: 1e12 });
const kids: FlamegraphSpan[] = [];
// 2000 strictly sequential (non-overlapping) children
for (let i = 0; i < 2000; i++) {
kids.push(
makeSpan({
spanId: `k${i}`,
parentSpanId: 'root',
timestamp: i * 10,
durationNano: 5e6, // 5ms, ends before next starts
}),
);
}
const layout = computeVisualLayout([[root], kids]);
expect(layout.spanToVisualRow['root']).toBe(0);
expect(layout.totalVisualRows).toBe(2); // all siblings share row 1
for (const k of kids) {
expect(layout.spanToVisualRow[k.spanId]).toBe(1);
}
noRowHasOverlap(layout);
});
it('should pack thousands of fully-overlapping leaf siblings without violations (wide path)', () => {
const root = makeSpan({ spanId: 'root', timestamp: 0, durationNano: 1e12 });
const kids: FlamegraphSpan[] = [];
// 1000 children all spanning the same window → each needs its own row
for (let i = 0; i < 1000; i++) {
kids.push(
makeSpan({
spanId: `k${i}`,
parentSpanId: 'root',
timestamp: 0,
durationNano: 100e6,
}),
);
}
const layout = computeVisualLayout([[root], kids]);
expect(layout.totalVisualRows).toBe(1001); // root + 1000 stacked rows
expect(Object.keys(layout.spanToVisualRow)).toHaveLength(1001);
noRowHasOverlap(layout);
});
it('should keep non-leaf subtrees adjacent within a wide mixed group (wide path)', () => {
const root = makeSpan({ spanId: 'root', timestamp: 0, durationNano: 1e12 });
const kids: FlamegraphSpan[] = [];
for (let i = 0; i < 1000; i++) {
kids.push(
makeSpan({
spanId: `k${i}`,
parentSpanId: 'root',
timestamp: i * 10,
durationNano: 5e6,
}),
);
}
// One of the wide siblings has a child of its own
const grandchild = makeSpan({
spanId: 'gc',
parentSpanId: 'k500',
timestamp: 5000,
durationNano: 2e6,
});
const layout = computeVisualLayout([[root], kids, [grandchild]]);
const parentRow = layout.spanToVisualRow['k500'];
const gcRow = layout.spanToVisualRow['gc'];
expect(gcRow - parentRow).toBe(1); // subtree adjacency preserved
expect(Object.keys(layout.spanToVisualRow)).toHaveLength(1002);
noRowHasOverlap(layout);
});
});

View File

@@ -18,6 +18,81 @@ export interface VisualLayout {
totalVisualRows: number;
}
// Above this many siblings under one parent, the connector-avoidance refinement
// (Checks 2 & 3) is both visually meaningless — the row is already a dense wall —
// and quadratic: every child deposits a connector point on each intermediate row,
// which pushes later children even higher, which deposits more points. That
// feedback loop inflates a layout needing ~50 rows to thousands and never
// finishes on wide traces. Past the threshold we pack by overlap only.
const WIDE_GROUP_THRESHOLD = 512;
/**
* Segment tree over rows that answers "lowest row index >= `from` whose smallest
* span start-time is >= `end`" in O(log rows). Used to place a large group of
* leaf siblings by overlap only: because siblings are processed in descending
* start order, every already-placed span on a row starts at or after the current
* one, so [start, end] overlaps a row iff some span there starts before `end` —
* i.e. the row is free iff its minimum start >= end. Each node stores the max of
* its subtree's per-row minimum starts so a free row can be found by descent.
*/
class LowestFreeRow {
private readonly size: number;
private readonly tree: Float64Array;
constructor(rows: number) {
let size = 1;
while (size < rows) {
size *= 2;
}
this.size = size;
this.tree = new Float64Array(size * 2).fill(Infinity);
}
place(row: number, start: number): void {
let i = row + this.size;
// A row's key is the minimum start among its spans. Children are processed
// in descending start order so a leaf's start is the new minimum, but a
// non-leaf subtree's descendant can land on a row out of order — take min.
if (start >= this.tree[i]) {
return;
}
this.tree[i] = start;
for (i >>= 1; i >= 1; i >>= 1) {
const next = Math.max(this.tree[2 * i], this.tree[2 * i + 1]);
if (this.tree[i] === next) {
break;
}
this.tree[i] = next;
}
}
lowestFrom(from: number, end: number): number {
return this.descend(1, 0, this.size - 1, from, end);
}
private descend(
node: number,
lo: number,
hi: number,
from: number,
end: number,
): number {
if (hi < from || this.tree[node] < end) {
return -1;
}
if (lo === hi) {
return lo;
}
const mid = (lo + hi) >> 1;
const left = this.descend(2 * node, lo, mid, from, end);
if (left !== -1) {
return left;
}
return this.descend(2 * node + 1, mid + 1, hi, from, end);
}
}
/**
* Computes an overlap-safe visual layout for flamegraph spans using DFS ordering.
*
@@ -214,7 +289,53 @@ export function computeVisualLayout(spans: FlamegraphSpan[][]): VisualLayout {
arr.push(point);
}
// Fast path for a parent with a very large group of children: pack by overlap
// only (descending greedy), skipping the quadratic connector-avoidance that
// spirals at this scale. Leaf children — the bulk of a wide trace — are placed
// in O(log rows) via the segment tree; the rare non-leaf subtree falls back to
// findPlacement against the shared interval map. Both structures are kept in
// sync so each placement sees all prior occupancy. Same ShapeEntry[] contract.
function computeWideShape(
rootSpan: FlamegraphSpan,
children: FlamegraphSpan[],
): ShapeEntry[] {
const shape: ShapeEntry[] = [{ span: rootSpan, relativeRow: 0 }];
const localIntervals = new Map<number, Array<[number, number]>>();
// Children occupy relative rows 1..children.length in the worst case.
const finder = new LowestFreeRow(children.length + 2);
const occupy = (row: number, span: FlamegraphSpan): void => {
const s = span.timestamp;
const e = span.timestamp + span.durationNano / 1e6;
shape.push({ span, relativeRow: row });
addIntervalTo(localIntervals, row, s, e);
finder.place(row, s);
};
for (const child of children) {
if (childrenMap.has(child.spanId)) {
// Non-leaf: place its whole subtree shape as a unit via findPlacement.
const childShape = computeSubtreeShape(child);
const offset = findPlacement(childShape, 1, localIntervals);
for (const entry of childShape) {
occupy(entry.relativeRow + offset, entry.span);
}
} else {
const end = child.timestamp + child.durationNano / 1e6;
occupy(finder.lowestFrom(1, end), child);
}
}
return shape;
}
function computeSubtreeShape(rootSpan: FlamegraphSpan): ShapeEntry[] {
const children = childrenMap.get(rootSpan.spanId);
if (children && children.length > WIDE_GROUP_THRESHOLD) {
return computeWideShape(rootSpan, children);
}
const localIntervals = new Map<number, Array<[number, number]>>();
const localConnectorPoints = new Map<number, number[]>();
const shape: ShapeEntry[] = [];
@@ -225,7 +346,6 @@ export function computeVisualLayout(spans: FlamegraphSpan[][]): VisualLayout {
shape.push({ span: rootSpan, relativeRow: 0 });
addIntervalTo(localIntervals, 0, rootStart, rootEnd);
const children = childrenMap.get(rootSpan.spanId);
if (children) {
for (const child of children) {
const childShape = computeSubtreeShape(child);

View File

@@ -94,7 +94,7 @@ export function useVisualLayoutWorker(spans: FlamegraphSpan[][]): {
cleanup();
};
// Timeout: if worker doesn't respond in 30s, terminate and error
// Timeout: if worker doesn't respond in 15s, terminate and error
const WORKER_TIMEOUT_MS = 15000;
const timeoutId = setTimeout(() => {
if (requestIdRef.current === currentId && isComputingRef.current) {

View File

@@ -67,24 +67,5 @@ func (provider *provider) addTraceDetailRoutes(router *mux.Router) error {
return err
}
if err := router.Handle("/api/v3/traces/{traceID}/flamegraph", handler.New(
provider.authzMiddleware.ViewAccess(provider.traceDetailHandler.GetFlamegraph),
handler.OpenAPIDef{
ID: "GetFlamegraph",
Tags: []string{"tracedetail"},
Summary: "Get flamegraph view for a trace",
Description: "Returns the flamegraph view of spans for a given trace ID.",
Request: new(spantypes.PostableFlamegraph),
RequestContentType: "application/json",
Response: new(spantypes.GettableFlamegraphTrace),
ResponseContentType: "application/json",
SuccessStatusCode: http.StatusOK,
ErrorStatusCodes: []int{http.StatusBadRequest, http.StatusNotFound},
SecuritySchemes: newSecuritySchemes(types.RoleViewer),
},
)).Methods(http.MethodPost).GetError(); err != nil {
return err
}
return nil
}

View File

@@ -6,16 +6,7 @@ import (
)
type Config struct {
Waterfall WaterfallConfig `mapstructure:"waterfall"`
Flamegraph FlamegraphConfig `mapstructure:"flamegraph"`
}
type FlamegraphConfig struct {
MaxSelectedLevels int `mapstructure:"max_selected_levels"`
MaxSpansPerLevel int `mapstructure:"max_spans_per_level"`
SamplingTopLatencySpansCount int `mapstructure:"sampling_top_latency_count"`
SamplingBucketCount int `mapstructure:"sampling_bucket_count"`
SelectAllSpansLimit uint `mapstructure:"select_all_spans_limit"`
Waterfall WaterfallConfig `mapstructure:"waterfall"`
}
type WaterfallConfig struct {
@@ -38,13 +29,6 @@ func newConfig() factory.Config {
MaxDepthToAutoExpand: 5,
MaxLimitToSelectAllSpans: 10_000,
},
Flamegraph: FlamegraphConfig{
MaxSelectedLevels: 50,
MaxSpansPerLevel: 100,
SamplingTopLatencySpansCount: 5,
SamplingBucketCount: 50,
SelectAllSpansLimit: 100_000,
},
}
}
@@ -61,25 +45,5 @@ func (c Config) Validate() error {
return errors.NewInvalidInputf(errors.CodeInvalidInput,
"tracedetail.waterfall.max_limit_to_select_all_spans must be positive")
}
if c.Flamegraph.MaxSelectedLevels <= 0 {
return errors.NewInvalidInputf(errors.CodeInvalidInput,
"tracedetail.flamegraph.level_limit must be positive, got %d", c.Flamegraph.MaxSelectedLevels)
}
if c.Flamegraph.MaxSpansPerLevel <= 0 {
return errors.NewInvalidInputf(errors.CodeInvalidInput,
"tracedetail.flamegraph.spans_per_level must be positive, got %d", c.Flamegraph.MaxSpansPerLevel)
}
if c.Flamegraph.SamplingTopLatencySpansCount < 0 {
return errors.NewInvalidInputf(errors.CodeInvalidInput,
"tracedetail.flamegraph.top_latency_count cannot be negative, got %d", c.Flamegraph.SamplingTopLatencySpansCount)
}
if c.Flamegraph.SamplingBucketCount <= 0 {
return errors.NewInvalidInputf(errors.CodeInvalidInput,
"tracedetail.flamegraph.bucket_count must be positive, got %d", c.Flamegraph.SamplingBucketCount)
}
if c.Flamegraph.SelectAllSpansLimit == 0 {
return errors.NewInvalidInputf(errors.CodeInvalidInput,
"tracedetail.flamegraph.max_limit_to_select_all_spans must be positive")
}
return nil
}

View File

@@ -80,19 +80,3 @@ func (h *handler) GetTraceAggregations(rw http.ResponseWriter, r *http.Request)
render.Success(rw, http.StatusOK, result)
}
func (h *handler) GetFlamegraph(rw http.ResponseWriter, r *http.Request) {
req := new(spantypes.PostableFlamegraph)
if err := binding.JSON.BindBody(r.Body, req); err != nil {
render.Error(rw, err)
return
}
result, err := h.module.GetFlamegraph(r.Context(), mux.Vars(r)["traceID"], req.SelectedSpanID, req.SelectFields)
if err != nil {
render.Error(rw, err)
return
}
render.Success(rw, http.StatusOK, result)
}

View File

@@ -7,7 +7,6 @@ import (
"github.com/SigNoz/signoz/pkg/factory"
"github.com/SigNoz/signoz/pkg/modules/tracedetail"
"github.com/SigNoz/signoz/pkg/types/spantypes"
"github.com/SigNoz/signoz/pkg/types/telemetrytypes"
)
type module struct {
@@ -149,17 +148,6 @@ func (m *module) GetTraceAggregations(ctx context.Context, traceID string, req *
return &spantypes.GettableTraceAggregations{Aggregations: results}, nil
}
func (m *module) GetFlamegraph(ctx context.Context, traceID string, selectedSpanID string, selectFields []telemetrytypes.TelemetryFieldKey) (*spantypes.GettableFlamegraphTrace, error) {
summary, err := m.store.GetTraceSummary(ctx, traceID)
if err != nil {
return nil, err
}
if summary.NumSpans <= uint64(m.config.Flamegraph.SelectAllSpansLimit) {
return m.getFullFlamegraph(ctx, traceID, summary, selectFields)
}
return m.getWindowedFlamegraph(ctx, traceID, selectedSpanID, summary, selectFields)
}
// getWindowedWaterfall builds the waterfall tree with minimal data and then returns only a window of full spans.
func (m *module) getWindowedWaterfall(ctx context.Context, traceID, selectedSpanID string, uncollapsedSpans []string, start, end time.Time) (*spantypes.GettableWaterfallTrace, error) {
// Step 1: minimal fetch → build full tree → select visible window
@@ -200,47 +188,3 @@ func (m *module) getWindowedWaterfall(ctx context.Context, traceID, selectedSpan
waterfallTrace, selectedSpans, uncollapsedSpans, false, nil,
), nil
}
func (m *module) getFullFlamegraph(ctx context.Context, traceID string, summary *spantypes.TraceSummary, selectFields []telemetrytypes.TelemetryFieldKey) (*spantypes.GettableFlamegraphTrace, error) {
fullSpans, err := m.store.GetFlamegraphSpans(ctx, traceID, summary.Start, summary.End, nil)
if err != nil {
return nil, err
}
if len(fullSpans) == 0 {
return nil, spantypes.ErrTraceNotFound
}
flamegraphTrace := spantypes.NewFlamegraphTraceFromStorable(fullSpans, selectFields)
return spantypes.NewGettableFlamegraphTrace(flamegraphTrace.GetAllLevels(), summary.Start.UnixMilli(), summary.End.UnixMilli(), false), nil
}
// getWindowedFlamegraph returns a window of a max levels and max sampled spans per level around the selected span.
func (m *module) getWindowedFlamegraph(ctx context.Context, traceID, selectedSpanID string, summary *spantypes.TraceSummary, selectFields []telemetrytypes.TelemetryFieldKey) (*spantypes.GettableFlamegraphTrace, error) {
minimalSpans, err := m.store.GetMinimalSpans(ctx, traceID, summary.Start, summary.End)
if err != nil {
return nil, err
}
if len(minimalSpans) == 0 {
return nil, spantypes.ErrTraceNotFound
}
flamegraphTrace := spantypes.NewFlamegraphTraceFromMinimal(minimalSpans)
minimalSpans = nil //nolint:ineffassign,wastedassign // release backing array before further db calls
cfg := m.config.Flamegraph
selectedSpans := flamegraphTrace.GetSelectedLevels(selectedSpanID, cfg.MaxSelectedLevels, cfg.MaxSpansPerLevel, cfg.SamplingTopLatencySpansCount, cfg.SamplingBucketCount)
if len(selectedSpans) == 0 {
return nil, spantypes.ErrTraceNotFound
}
fullSpans, err := m.store.GetFlamegraphSpans(ctx, traceID, summary.Start, summary.End, spantypes.FlamegraphWindowSpanIDs(selectedSpans))
if err != nil {
return nil, err
}
return spantypes.NewGettableFlamegraphTrace(
flamegraphTrace.EnrichSelectedSpans(selectedSpans, fullSpans, selectFields),
summary.Start.UnixMilli(),
summary.End.UnixMilli(),
true,
), nil
}

View File

@@ -154,46 +154,6 @@ func (s *traceStore) GetTraceSpansByIDs(ctx context.Context, traceID string, sta
return spans, nil
}
func (s *traceStore) GetFlamegraphSpans(ctx context.Context, traceID string, start, end time.Time, spanIDs []string) ([]spantypes.StorableSpan, error) {
sb := sqlbuilder.NewSelectBuilder()
sb.Select(
"span_id",
"any(parent_span_id) AS parent_span_id",
"any(timestamp) AS timestamp",
"any(duration_nano) AS duration_nano",
"any(has_error) AS has_error",
"any(name) AS name",
"any(events) AS events",
"any(attributes_string) AS attributes_string",
"any(attributes_number) AS attributes_number",
"any(attributes_bool) AS attributes_bool",
"any(resources_string) AS resources_string",
)
sb.From(fmt.Sprintf("%s.%s", spantypes.TraceDB, spantypes.TraceTable))
conditions := []string{
sb.E("trace_id", traceID),
sb.GE("ts_bucket_start", start.Unix()-1800),
sb.LE("ts_bucket_start", end.Unix()),
}
if len(spanIDs) > 0 {
ids := make([]any, len(spanIDs))
for i, id := range spanIDs {
ids[i] = id
}
conditions = append(conditions, sb.In("span_id", ids...))
}
sb.Where(conditions...)
sb.GroupBy("span_id")
sb.OrderBy("timestamp ASC", "name ASC")
query, args := sb.BuildWithFlavor(sqlbuilder.ClickHouse)
var spans []spantypes.StorableSpan
if err := s.telemetryStore.ClickhouseDB().Select(ctx, &spans, query, args...); err != nil {
return nil, errors.WrapInternalf(err, errors.CodeInternal, "error querying flamegraph spans")
}
return spans, nil
}
func (s *traceStore) GetSpanCountByField(ctx context.Context, traceID string, summary *spantypes.TraceSummary, fieldKey telemetrytypes.TelemetryFieldKey) (map[string]uint64, error) {
fieldExpr, err := buildFieldExpr(fieldKey)
if err != nil {

View File

@@ -5,7 +5,6 @@ import (
"net/http"
"github.com/SigNoz/signoz/pkg/types/spantypes"
"github.com/SigNoz/signoz/pkg/types/telemetrytypes"
)
// Handler exposes HTTP handlers for trace detail APIs.
@@ -13,7 +12,6 @@ type Handler interface {
GetWaterfall(http.ResponseWriter, *http.Request)
GetWaterfallV4(http.ResponseWriter, *http.Request)
GetTraceAggregations(http.ResponseWriter, *http.Request)
GetFlamegraph(http.ResponseWriter, *http.Request)
}
// Module defines the business logic for trace detail operations.
@@ -21,5 +19,4 @@ type Module interface {
GetWaterfall(ctx context.Context, traceID string, req *spantypes.PostableWaterfall) (*spantypes.GettableWaterfallTrace, error)
GetWaterfallV4(ctx context.Context, traceID string, selectedSpanID string, uncollapsedSpans []string, selectAllLimit uint) (*spantypes.GettableWaterfallTrace, error)
GetTraceAggregations(ctx context.Context, traceID string, req *spantypes.PostableTraceAggregations) (*spantypes.GettableTraceAggregations, error)
GetFlamegraph(ctx context.Context, traceID string, selectedSpanID string, selectFields []telemetrytypes.TelemetryFieldKey) (*spantypes.GettableFlamegraphTrace, error)
}

View File

@@ -124,8 +124,10 @@ func (b *traceQueryStatementBuilder) Build(
-------------------------------- End of tech debt ----------------------------
*/
query = b.adjustKeys(ctx, keys, query, requestType)
for _, action := range adjustTraceKeys(keys, &query, requestType) {
// TODO: change to debug level once we are confident about the behavior
b.logger.InfoContext(ctx, "key adjustment action", slog.String("action", action))
}
// Create SQL builder
q := sqlbuilder.NewSelectBuilder()
@@ -193,24 +195,30 @@ func getKeySelectors(query qbtypes.QueryBuilderQuery[qbtypes.TraceAggregation])
return keySelectors
}
func (b *traceQueryStatementBuilder) adjustKeys(ctx context.Context, keys map[string][]*telemetrytypes.TelemetryFieldKey, query qbtypes.QueryBuilderQuery[qbtypes.TraceAggregation], requestType qbtypes.RequestType) qbtypes.QueryBuilderQuery[qbtypes.TraceAggregation] {
// add deprecated fields only during statement building
// why?
// 1. to not fail filter expression that use deprecated cols
// 2. this could have been moved to metadata fetching itself, however, that
// would mean, they also show up in suggestions we we don't want to do
// 3. reason for not doing a simple append is to keep intrinsic/calculated field first so that it gets
// priority in multi_if sql expression
// mergeDeprecatedTraceKeys prepends deprecated intrinsic/calculated trace field
// definitions to the keys map. We do this during statement building, not at
// metadata fetch time, because:
// 1. Filter expressions that reference deprecated columns must continue to
// resolve — otherwise they fail with "key not found".
// 2. Doing it at metadata fetch time would also surface deprecated keys in
// autocomplete suggestions, which we don't want.
// 3. We prepend (not append) so the intrinsic/calculated entry wins ordering
// in the multi_if SQL expression.
func mergeDeprecatedTraceKeys(keys map[string][]*telemetrytypes.TelemetryFieldKey) {
for fieldKeyName, fieldKey := range IntrinsicFieldsDeprecated {
keys[fieldKeyName] = append([]*telemetrytypes.TelemetryFieldKey{&fieldKey}, keys[fieldKeyName]...)
}
for fieldKeyName, fieldKey := range CalculatedFieldsDeprecated {
keys[fieldKeyName] = append([]*telemetrytypes.TelemetryFieldKey{&fieldKey}, keys[fieldKeyName]...)
}
}
func adjustTraceKeys(keys map[string][]*telemetrytypes.TelemetryFieldKey, query *qbtypes.QueryBuilderQuery[qbtypes.TraceAggregation], requestType qbtypes.RequestType) []string {
mergeDeprecatedTraceKeys(keys)
// Adjust keys for alias expressions in aggregations
actions := querybuilder.AdjustKeysForAliasExpressions(&query, requestType)
actions := querybuilder.AdjustKeysForAliasExpressions(query, requestType)
/*
Check if user is using multiple contexts or data types for same field name
@@ -228,7 +236,7 @@ func (b *traceQueryStatementBuilder) adjustKeys(ctx context.Context, keys map[st
and make it just http.status_code and remove the duplicate entry.
*/
actions = append(actions, querybuilder.AdjustDuplicateKeys(&query)...)
actions = append(actions, querybuilder.AdjustDuplicateKeys(query)...)
/*
Now adjust each key to have correct context and data type
@@ -236,24 +244,20 @@ func (b *traceQueryStatementBuilder) adjustKeys(ctx context.Context, keys map[st
Reason for doing this is to not create an unexpected behavior for users
*/
for idx := range query.SelectFields {
actions = append(actions, b.adjustKey(&query.SelectFields[idx], keys)...)
actions = append(actions, adjustTraceKey(&query.SelectFields[idx], keys)...)
}
for idx := range query.GroupBy {
actions = append(actions, b.adjustKey(&query.GroupBy[idx].TelemetryFieldKey, keys)...)
actions = append(actions, adjustTraceKey(&query.GroupBy[idx].TelemetryFieldKey, keys)...)
}
for idx := range query.Order {
actions = append(actions, b.adjustKey(&query.Order[idx].Key.TelemetryFieldKey, keys)...)
actions = append(actions, adjustTraceKey(&query.Order[idx].Key.TelemetryFieldKey, keys)...)
}
for _, action := range actions {
// TODO: change to debug level once we are confident about the behavior
b.logger.InfoContext(ctx, "key adjustment action", slog.String("action", action))
}
return query
return actions
}
func (b *traceQueryStatementBuilder) adjustKey(key *telemetrytypes.TelemetryFieldKey, keys map[string][]*telemetrytypes.TelemetryFieldKey) []string {
// adjustTraceKey resolves a single TelemetryFieldKey against the keys map.
func adjustTraceKey(key *telemetrytypes.TelemetryFieldKey, keys map[string][]*telemetrytypes.TelemetryFieldKey) []string {
// for recording actions taken
actions := []string{}

View File

@@ -1125,28 +1125,13 @@ func TestAdjustKey(t *testing.T) {
},
}
fm := NewFieldMapper()
cb := NewConditionBuilder(fm)
mockMetadataStore := telemetrytypestest.NewMockMetadataStore()
fl := flaggertest.New(t)
aggExprRewriter := querybuilder.NewAggExprRewriter(instrumentationtest.New().ToProviderSettings(), nil, fm, cb, nil, fl)
statementBuilder := NewTraceQueryStatementBuilder(
instrumentationtest.New().ToProviderSettings(),
mockMetadataStore,
fm,
cb,
aggExprRewriter,
nil,
fl,
)
for _, c := range cases {
t.Run(c.name, func(t *testing.T) {
// Create a copy of the input key to avoid modifying the original
key := c.inputKey
// Call adjustKey
statementBuilder.adjustKey(&key, c.keysMap)
adjustTraceKey(&key, c.keysMap)
// Verify the key was adjusted as expected
require.Equal(t, c.expectedKey.Name, key.Name, "key name should match")
@@ -1399,21 +1384,6 @@ func TestAdjustKeys(t *testing.T) {
},
}
fm := NewFieldMapper()
cb := NewConditionBuilder(fm)
mockMetadataStore := telemetrytypestest.NewMockMetadataStore()
fl := flaggertest.New(t)
aggExprRewriter := querybuilder.NewAggExprRewriter(instrumentationtest.New().ToProviderSettings(), nil, fm, cb, nil, fl)
statementBuilder := NewTraceQueryStatementBuilder(
instrumentationtest.New().ToProviderSettings(),
mockMetadataStore,
fm,
cb,
aggExprRewriter,
nil,
fl,
)
for _, c := range cases {
t.Run(c.name, func(t *testing.T) {
// Create a deep copy of the keys map to avoid modifying the original
@@ -1424,7 +1394,7 @@ func TestAdjustKeys(t *testing.T) {
}
// Call adjustKeys
c.query = statementBuilder.adjustKeys(context.Background(), keysMapCopy, c.query, qbtypes.RequestTypeScalar)
adjustTraceKeys(keysMapCopy, &c.query, qbtypes.RequestTypeScalar)
// Verify select fields were adjusted
if c.expectedSelectFields != nil {

View File

@@ -216,6 +216,13 @@ func (b *traceOperatorCTEBuilder) buildQueryCTE(ctx context.Context, queryName s
}
b.stmtBuilder.logger.DebugContext(ctx, "Retrieved keys for query", slog.String("query_name", queryName), slog.Int("keys_count", len(keys)))
// The CTE only selects spans matching the filter. Aggregations, group by
// and order by run later in buildFinalQuery, so RequestTypeRaw is fine here.
for _, action := range adjustTraceKeys(keys, query, qbtypes.RequestTypeRaw) {
// TODO: change to debug level once we are confident about the behavior
b.stmtBuilder.logger.InfoContext(ctx, "key adjustment action", slog.String("action", action))
}
// Build resource filter CTE for this specific query
resourceFilterCTEName := fmt.Sprintf("__resource_filter_%s", cteName)
resourceStmt, err := b.buildResourceFilterCTE(ctx, *query)
@@ -417,21 +424,28 @@ func (b *traceOperatorCTEBuilder) buildNotCTE(leftCTE, rightCTE string) (string,
}
func (b *traceOperatorCTEBuilder) buildFinalQuery(ctx context.Context, selectFromCTE string, requestType qbtypes.RequestType) (*qbtypes.Statement, error) {
keySelectors := b.getKeySelectors()
keys, _, err := b.stmtBuilder.metadataStore.GetKeysMulti(ctx, keySelectors)
if err != nil {
return nil, err
}
b.adjustOperatorKeys(ctx, keys, requestType)
switch requestType {
case qbtypes.RequestTypeRaw:
return b.buildListQuery(ctx, selectFromCTE)
return b.buildListQuery(ctx, selectFromCTE, keys)
case qbtypes.RequestTypeTimeSeries:
return b.buildTimeSeriesQuery(ctx, selectFromCTE)
return b.buildTimeSeriesQuery(ctx, selectFromCTE, keys)
case qbtypes.RequestTypeTrace:
return b.buildTraceQuery(ctx, selectFromCTE)
return b.buildTraceQuery(ctx, selectFromCTE, keys)
case qbtypes.RequestTypeScalar:
return b.buildScalarQuery(ctx, selectFromCTE)
return b.buildScalarQuery(ctx, selectFromCTE, keys)
default:
return nil, errors.NewInvalidInputf(errors.CodeInvalidInput, "unsupported request type: %s", requestType)
}
}
func (b *traceOperatorCTEBuilder) buildListQuery(ctx context.Context, selectFromCTE string) (*qbtypes.Statement, error) {
func (b *traceOperatorCTEBuilder) buildListQuery(ctx context.Context, selectFromCTE string, keys map[string][]*telemetrytypes.TelemetryFieldKey) (*qbtypes.Statement, error) {
sb := sqlbuilder.NewSelectBuilder()
// Select core fields
@@ -453,22 +467,6 @@ func (b *traceOperatorCTEBuilder) buildListQuery(ctx context.Context, selectFrom
"parent_span_id": true,
}
// Get keys for selectFields
keySelectors := b.getKeySelectors()
for _, field := range b.operator.SelectFields {
keySelectors = append(keySelectors, &telemetrytypes.FieldKeySelector{
Name: field.Name,
Signal: telemetrytypes.SignalTraces,
FieldContext: field.FieldContext,
FieldDataType: field.FieldDataType,
})
}
keys, _, err := b.stmtBuilder.metadataStore.GetKeysMulti(ctx, keySelectors)
if err != nil {
return nil, err
}
// Add selectFields using ColumnExpressionFor since we now have all base table columns
for _, field := range b.operator.SelectFields {
if selectedFields[field.Name] {
@@ -518,6 +516,44 @@ func (b *traceOperatorCTEBuilder) buildListQuery(ctx context.Context, selectFrom
}, nil
}
// adjustOperatorKeys runs the same key adjustments as adjustTraceKeys, but on
// the operator's own fields. The operator has a different struct shape than
// QueryBuilderQuery, so we copy the relevant fields into a temp query, run
// the shared helpers, and copy the results back.
func (b *traceOperatorCTEBuilder) adjustOperatorKeys(ctx context.Context, keys map[string][]*telemetrytypes.TelemetryFieldKey, requestType qbtypes.RequestType) {
mergeDeprecatedTraceKeys(keys)
tmp := qbtypes.QueryBuilderQuery[qbtypes.TraceAggregation]{
Aggregations: b.operator.Aggregations,
SelectFields: b.operator.SelectFields,
GroupBy: b.operator.GroupBy,
Order: b.operator.Order,
}
actions := querybuilder.AdjustKeysForAliasExpressions(&tmp, requestType)
actions = append(actions, querybuilder.AdjustDuplicateKeys(&tmp)...)
for idx := range tmp.SelectFields {
actions = append(actions, adjustTraceKey(&tmp.SelectFields[idx], keys)...)
}
for idx := range tmp.GroupBy {
actions = append(actions, adjustTraceKey(&tmp.GroupBy[idx].TelemetryFieldKey, keys)...)
}
for idx := range tmp.Order {
actions = append(actions, adjustTraceKey(&tmp.Order[idx].Key.TelemetryFieldKey, keys)...)
}
// Copy back the slices the helpers can rewrite.
b.operator.Aggregations = tmp.Aggregations
b.operator.SelectFields = tmp.SelectFields
b.operator.GroupBy = tmp.GroupBy
b.operator.Order = tmp.Order
for _, action := range actions {
b.stmtBuilder.logger.InfoContext(ctx, "key adjustment action", slog.String("action", action))
}
}
func (b *traceOperatorCTEBuilder) getKeySelectors() []*telemetrytypes.FieldKeySelector {
var keySelectors []*telemetrytypes.FieldKeySelector
@@ -545,6 +581,15 @@ func (b *traceOperatorCTEBuilder) getKeySelectors() []*telemetrytypes.FieldKeySe
})
}
for _, sf := range b.operator.SelectFields {
keySelectors = append(keySelectors, &telemetrytypes.FieldKeySelector{
Name: sf.Name,
Signal: telemetrytypes.SignalTraces,
FieldContext: sf.FieldContext,
FieldDataType: sf.FieldDataType,
})
}
for i := range keySelectors {
keySelectors[i].Signal = telemetrytypes.SignalTraces
}
@@ -552,7 +597,7 @@ func (b *traceOperatorCTEBuilder) getKeySelectors() []*telemetrytypes.FieldKeySe
return keySelectors
}
func (b *traceOperatorCTEBuilder) buildTimeSeriesQuery(ctx context.Context, selectFromCTE string) (*qbtypes.Statement, error) {
func (b *traceOperatorCTEBuilder) buildTimeSeriesQuery(ctx context.Context, selectFromCTE string, keys map[string][]*telemetrytypes.TelemetryFieldKey) (*qbtypes.Statement, error) {
sb := sqlbuilder.NewSelectBuilder()
sb.Select(fmt.Sprintf(
@@ -560,12 +605,6 @@ func (b *traceOperatorCTEBuilder) buildTimeSeriesQuery(ctx context.Context, sele
int64(b.operator.StepInterval.Seconds()),
))
keySelectors := b.getKeySelectors()
keys, _, err := b.stmtBuilder.metadataStore.GetKeysMulti(ctx, keySelectors)
if err != nil {
return nil, err
}
var allGroupByArgs []any
for _, gb := range b.operator.GroupBy {
@@ -644,8 +683,7 @@ func (b *traceOperatorCTEBuilder) buildTimeSeriesQuery(ctx context.Context, sele
combinedArgs := append(allGroupByArgs, allAggChArgs...)
// Add HAVING clause if specified
err = b.addHavingClause(sb)
if err != nil {
if err := b.addHavingClause(sb); err != nil {
return nil, err
}
@@ -672,17 +710,11 @@ func (b *traceOperatorCTEBuilder) buildTraceSummaryCTE(selectFromCTE string) {
b.addCTE("trace_summary", sql, args, []string{"all_spans", selectFromCTE})
}
func (b *traceOperatorCTEBuilder) buildTraceQuery(ctx context.Context, selectFromCTE string) (*qbtypes.Statement, error) {
func (b *traceOperatorCTEBuilder) buildTraceQuery(ctx context.Context, selectFromCTE string, keys map[string][]*telemetrytypes.TelemetryFieldKey) (*qbtypes.Statement, error) {
b.buildTraceSummaryCTE(selectFromCTE)
sb := sqlbuilder.NewSelectBuilder()
keySelectors := b.getKeySelectors()
keys, _, err := b.stmtBuilder.metadataStore.GetKeysMulti(ctx, keySelectors)
if err != nil {
return nil, err
}
var allGroupByArgs []any
for _, gb := range b.operator.GroupBy {
@@ -764,8 +796,7 @@ func (b *traceOperatorCTEBuilder) buildTraceQuery(ctx context.Context, selectFro
sb.GroupBy(groupByKeys...)
}
err = b.addHavingClause(sb)
if err != nil {
if err := b.addHavingClause(sb); err != nil {
return nil, err
}
@@ -821,15 +852,9 @@ func (b *traceOperatorCTEBuilder) buildTraceQuery(ctx context.Context, selectFro
}, nil
}
func (b *traceOperatorCTEBuilder) buildScalarQuery(ctx context.Context, selectFromCTE string) (*qbtypes.Statement, error) {
func (b *traceOperatorCTEBuilder) buildScalarQuery(ctx context.Context, selectFromCTE string, keys map[string][]*telemetrytypes.TelemetryFieldKey) (*qbtypes.Statement, error) {
sb := sqlbuilder.NewSelectBuilder()
keySelectors := b.getKeySelectors()
keys, _, err := b.stmtBuilder.metadataStore.GetKeysMulti(ctx, keySelectors)
if err != nil {
return nil, err
}
var allGroupByArgs []any
for _, gb := range b.operator.GroupBy {
@@ -911,8 +936,7 @@ func (b *traceOperatorCTEBuilder) buildScalarQuery(ctx context.Context, selectFr
combinedArgs := append(allGroupByArgs, allAggChArgs...)
// Add HAVING clause if specified
err = b.addHavingClause(sb)
if err != nil {
if err := b.addHavingClause(sb); err != nil {
return nil, err
}

View File

@@ -14,6 +14,24 @@ import (
"github.com/stretchr/testify/require"
)
func newTestTraceOperatorStatementBuilder(t *testing.T) *traceOperatorStatementBuilder {
t.Helper()
fm := NewFieldMapper()
cb := NewConditionBuilder(fm)
mockMetadataStore := telemetrytypestest.NewMockMetadataStore()
mockMetadataStore.KeysMap = buildCompleteFieldKeyMap()
fl := flaggertest.New(t)
aggExprRewriter := querybuilder.NewAggExprRewriter(instrumentationtest.New().ToProviderSettings(), nil, fm, cb, nil, fl)
traceStmtBuilder := NewTraceQueryStatementBuilder(
instrumentationtest.New().ToProviderSettings(),
mockMetadataStore, fm, cb, aggExprRewriter, nil, fl,
)
return NewTraceOperatorStatementBuilder(
instrumentationtest.New().ToProviderSettings(),
mockMetadataStore, fm, cb, traceStmtBuilder, aggExprRewriter, fl,
)
}
func TestTraceOperatorStatementBuilder(t *testing.T) {
cases := []struct {
name string
@@ -463,32 +481,7 @@ func TestTraceOperatorStatementBuilder(t *testing.T) {
},
}
fm := NewFieldMapper()
cb := NewConditionBuilder(fm)
mockMetadataStore := telemetrytypestest.NewMockMetadataStore()
mockMetadataStore.KeysMap = buildCompleteFieldKeyMap()
fl := flaggertest.New(t)
aggExprRewriter := querybuilder.NewAggExprRewriter(instrumentationtest.New().ToProviderSettings(), nil, fm, cb, nil, fl)
traceStmtBuilder := NewTraceQueryStatementBuilder(
instrumentationtest.New().ToProviderSettings(),
mockMetadataStore,
fm,
cb,
aggExprRewriter,
nil,
fl,
)
statementBuilder := NewTraceOperatorStatementBuilder(
instrumentationtest.New().ToProviderSettings(),
mockMetadataStore,
fm,
cb,
traceStmtBuilder,
aggExprRewriter,
fl,
)
statementBuilder := newTestTraceOperatorStatementBuilder(t)
for _, c := range cases {
t.Run(c.name, func(t *testing.T) {
@@ -579,32 +572,7 @@ func TestTraceOperatorStatementBuilderErrors(t *testing.T) {
},
}
fm := NewFieldMapper()
cb := NewConditionBuilder(fm)
mockMetadataStore := telemetrytypestest.NewMockMetadataStore()
mockMetadataStore.KeysMap = buildCompleteFieldKeyMap()
fl := flaggertest.New(t)
aggExprRewriter := querybuilder.NewAggExprRewriter(instrumentationtest.New().ToProviderSettings(), nil, fm, cb, nil, fl)
traceStmtBuilder := NewTraceQueryStatementBuilder(
instrumentationtest.New().ToProviderSettings(),
mockMetadataStore,
fm,
cb,
aggExprRewriter,
nil,
fl,
)
statementBuilder := NewTraceOperatorStatementBuilder(
instrumentationtest.New().ToProviderSettings(),
mockMetadataStore,
fm,
cb,
traceStmtBuilder,
aggExprRewriter,
fl,
)
statementBuilder := newTestTraceOperatorStatementBuilder(t)
for _, c := range cases {
t.Run(c.name, func(t *testing.T) {
@@ -626,3 +594,142 @@ func TestTraceOperatorStatementBuilderErrors(t *testing.T) {
})
}
}
func TestTraceOperatorStatementBuilderAdjustsKeys(t *testing.T) {
cases := []struct {
name string
requestType qbtypes.RequestType
operator qbtypes.QueryBuilderTraceOperator
builderFilter string
wantSQL string
wantArgs []any
}{
{
name: "deprecated duration filter in referenced builder query",
requestType: qbtypes.RequestTypeRaw,
operator: qbtypes.QueryBuilderTraceOperator{
Expression: "A",
Limit: 10,
},
builderFilter: "durationNano = '3s'",
wantSQL: "duration_nano = ?",
wantArgs: []any{int64(3000000000)},
},
{
name: "context-prefixed aggregation alias in order by",
requestType: qbtypes.RequestTypeScalar,
operator: qbtypes.QueryBuilderTraceOperator{
Expression: "A",
Aggregations: []qbtypes.TraceAggregation{
{
Expression: "count()",
Alias: "span.count_",
},
},
Order: []qbtypes.OrderBy{
{
Key: qbtypes.OrderByKey{
TelemetryFieldKey: telemetrytypes.TelemetryFieldKey{
Name: "count_",
FieldContext: telemetrytypes.FieldContextSpan,
},
},
Direction: qbtypes.OrderDirectionDesc,
},
},
},
wantSQL: "ORDER BY __result_0 desc",
},
}
statementBuilder := newTestTraceOperatorStatementBuilder(t)
for _, c := range cases {
t.Run(c.name, func(t *testing.T) {
err := c.operator.ParseExpression()
require.NoError(t, err)
filter := c.builderFilter
if filter == "" {
filter = "service.name = 'frontend'"
}
q, err := statementBuilder.Build(
context.Background(),
1747947419000,
1747983448000,
c.requestType,
c.operator,
&qbtypes.CompositeQuery{
Queries: []qbtypes.QueryEnvelope{
{
Type: qbtypes.QueryTypeBuilder,
Spec: qbtypes.QueryBuilderQuery[qbtypes.TraceAggregation]{
Name: "A",
Signal: telemetrytypes.SignalTraces,
Filter: &qbtypes.Filter{Expression: filter},
},
},
},
},
)
require.NoError(t, err)
require.Contains(t, q.Query, c.wantSQL)
for _, arg := range c.wantArgs {
require.Contains(t, q.Args, arg)
}
})
}
}
// TestTraceOperatorStatementBuilderDeduplicatesKeys checks that a trace
// operator with the same field name listed twice in GroupBy (once with a
// context, once without) ends up with a single column in the outer SELECT
// and a single entry in GROUP BY.
func TestTraceOperatorStatementBuilderDeduplicatesKeys(t *testing.T) {
statementBuilder := newTestTraceOperatorStatementBuilder(t)
operator := qbtypes.QueryBuilderTraceOperator{
Expression: "A",
Aggregations: []qbtypes.TraceAggregation{
{Expression: "count()"},
},
GroupBy: []qbtypes.GroupByKey{
{TelemetryFieldKey: telemetrytypes.TelemetryFieldKey{
Name: "http.method",
FieldContext: telemetrytypes.FieldContextAttribute,
}},
// Same name, no context — should be merged with the entry above.
{TelemetryFieldKey: telemetrytypes.TelemetryFieldKey{
Name: "http.method",
}},
},
}
require.NoError(t, operator.ParseExpression())
q, err := statementBuilder.Build(
context.Background(),
1747947419000,
1747983448000,
qbtypes.RequestTypeScalar,
operator,
&qbtypes.CompositeQuery{
Queries: []qbtypes.QueryEnvelope{
{
Type: qbtypes.QueryTypeBuilder,
Spec: qbtypes.QueryBuilderQuery[qbtypes.TraceAggregation]{
Name: "A",
Signal: telemetrytypes.SignalTraces,
Filter: &qbtypes.Filter{Expression: "service.name = 'frontend'"},
},
},
},
},
)
require.NoError(t, err)
require.Contains(t, q.Query,
"SELECT toString(multiIf(mapContains(attributes_string, 'http.method') = ?, attributes_string['http.method'], NULL)) AS `http.method`, count() AS __result_0 FROM A GROUP BY `http.method` ORDER BY __result_0 DESC")
}

View File

@@ -1,95 +0,0 @@
package spantypes
import (
"github.com/SigNoz/signoz/pkg/types/telemetrytypes"
)
type FlamegraphSpan struct {
SpanID string `json:"spanId"`
ParentSpanID string `json:"parentSpanId"`
Timestamp uint64 `json:"timestamp"`
DurationNano uint64 `json:"durationNano"`
HasError bool `json:"hasError"`
Name string `json:"name"`
Level int64 `json:"level"`
Events []Event `json:"event" required:"true" nullable:"false"`
Attributes map[string]any `json:"attributes,omitempty"`
Resource map[string]string `json:"resource,omitempty"`
Children []*FlamegraphSpan `json:"-"` // internal tree use only
}
// FlamegraphLevel groups span IDs at a single level within the selected window.
type FlamegraphLevel struct {
Level int64
SpanIDs []string
}
type PostableFlamegraph struct {
SelectedSpanID string `json:"selectedSpanId"`
SelectFields []telemetrytypes.TelemetryFieldKey `json:"selectFields,omitempty"`
}
// GettableFlamegraphTrace is the response for the v3 flamegraph API.
type GettableFlamegraphTrace struct {
Spans [][]*FlamegraphSpan `json:"spans" required:"true" nullable:"false"`
StartTimestampMillis int64 `json:"startTimestampMillis" required:"true"`
EndTimestampMillis int64 `json:"endTimestampMillis" required:"true"`
HasMore bool `json:"hasMore" required:"true"`
}
func NewGettableFlamegraphTrace(spans [][]*FlamegraphSpan, startMs, endMs int64, hasMore bool) *GettableFlamegraphTrace {
return &GettableFlamegraphTrace{
Spans: spans,
StartTimestampMillis: startMs,
EndTimestampMillis: endMs,
HasMore: hasMore,
}
}
func NewFlamegraphSpanFromStorable(s *StorableSpan, level int64, selectFields []telemetrytypes.TelemetryFieldKey) *FlamegraphSpan {
span := &FlamegraphSpan{
SpanID: s.SpanID,
ParentSpanID: s.ParentSpanID,
Timestamp: uint64(s.StartTime.UnixNano()),
DurationNano: s.DurationNano,
HasError: s.HasError,
Name: s.Name,
Level: level,
Events: s.UnmarshalledEvents(),
}
if len(selectFields) == 0 {
return span
}
for _, field := range selectFields {
switch field.FieldContext {
case telemetrytypes.FieldContextResource:
if v, ok := s.ResourcesString[field.Name]; ok && v != "" {
if span.Resource == nil {
span.Resource = make(map[string]string)
}
span.Resource[field.Name] = v
}
case telemetrytypes.FieldContextAttribute:
if v := s.AttributeValue(field.Name); v != nil {
if span.Attributes == nil {
span.Attributes = make(map[string]any)
}
span.Attributes[field.Name] = v
}
}
}
return span
}
// FlamegraphWindowSpanIDs collects all span IDs from a level window into a flat slice.
func FlamegraphWindowSpanIDs(window []FlamegraphLevel) []string {
total := 0
for _, lvl := range window {
total += len(lvl.SpanIDs)
}
ids := make([]string, 0, total)
for _, lvl := range window {
ids = append(ids, lvl.SpanIDs...)
}
return ids
}

View File

@@ -1,117 +0,0 @@
package spantypes
import (
"sort"
"github.com/SigNoz/signoz/pkg/types/telemetrytypes"
)
// FlamegraphTrace holds the level wise tree built from minimal spans.
type FlamegraphTrace struct {
roots []*FlamegraphSpan
nodeByID map[string]*FlamegraphSpan
startTime uint64
endTime uint64
}
func NewFlamegraphTraceFromMinimal(spans []MinimalSpan) *FlamegraphTrace {
t := &FlamegraphTrace{
nodeByID: make(map[string]*FlamegraphSpan, len(spans)),
}
for i := range spans {
node := spans[i].ToFlamegraphSpan()
t.updateTimeRange(node.Timestamp, node.DurationNano)
t.nodeByID[node.SpanID] = node
}
t.buildSpanTree()
return t
}
func NewFlamegraphTraceFromStorable(spans []StorableSpan, selectFields []telemetrytypes.TelemetryFieldKey) *FlamegraphTrace {
t := &FlamegraphTrace{
nodeByID: make(map[string]*FlamegraphSpan, len(spans)),
}
for i := range spans {
node := NewFlamegraphSpanFromStorable(&spans[i], 0, selectFields) // level is set later by BFS
t.updateTimeRange(node.Timestamp, node.DurationNano)
t.nodeByID[node.SpanID] = node
}
t.buildSpanTree()
return t
}
func (t *FlamegraphTrace) GetAllLevels() [][]*FlamegraphSpan {
return nil
}
// GetSelectedLevels returns the window of levels around selectedSpanID with sampling applied to dense levels.
func (t *FlamegraphTrace) GetSelectedLevels(selectedSpanID string, levelLimit, spansPerLevel, topLatencyCount, bucketCount int) []FlamegraphLevel {
return nil
}
func (t *FlamegraphTrace) EnrichSelectedSpans(selectedSpans []FlamegraphLevel, fullSpans []StorableSpan, selectFields []telemetrytypes.TelemetryFieldKey) [][]*FlamegraphSpan {
fullByID := make(map[string]*StorableSpan, len(fullSpans))
for i := range fullSpans {
fullByID[fullSpans[i].SpanID] = &fullSpans[i]
}
result := make([][]*FlamegraphSpan, len(selectedSpans))
for i, lvl := range selectedSpans {
result[i] = make([]*FlamegraphSpan, 0, len(lvl.SpanIDs))
for _, spanID := range lvl.SpanIDs {
if full, ok := fullByID[spanID]; ok {
result[i] = append(result[i], NewFlamegraphSpanFromStorable(full, lvl.Level, selectFields))
} else if lean, ok := t.nodeByID[spanID]; ok {
result[i] = append(result[i], lean)
}
}
}
return result
}
func (t *FlamegraphTrace) updateTimeRange(timestamp, durationNano uint64) {
if t.startTime == 0 || timestamp < t.startTime {
t.startTime = timestamp
}
if end := timestamp + durationNano; end > t.endTime {
t.endTime = end
}
}
func (t *FlamegraphTrace) buildSpanTree() {
for _, node := range t.nodeByID {
if node.ParentSpanID != "" {
if parent, ok := t.nodeByID[node.ParentSpanID]; ok {
parent.Children = append(parent.Children, node)
} else {
missing := &FlamegraphSpan{
SpanID: node.ParentSpanID,
Name: "Missing Span",
Timestamp: node.Timestamp,
DurationNano: node.DurationNano,
Children: []*FlamegraphSpan{node},
}
t.nodeByID[missing.SpanID] = missing
t.roots = append(t.roots, missing)
}
} else if flamegraphSpanIndex(t.roots, node.SpanID) == -1 {
t.roots = append(t.roots, node)
}
}
sort.Slice(t.roots, func(i, j int) bool {
if t.roots[i].Timestamp == t.roots[j].Timestamp {
return t.roots[i].SpanID < t.roots[j].SpanID
}
return t.roots[i].Timestamp < t.roots[j].Timestamp
})
}
func flamegraphSpanIndex(spans []*FlamegraphSpan, spanID string) int {
for i, s := range spans {
if s != nil && s.SpanID == spanID {
return i
}
}
return -1
}

View File

@@ -30,7 +30,6 @@ type TraceStore interface {
GetTraceSpans(ctx context.Context, traceID string, summary *TraceSummary) ([]StorableSpan, error)
GetMinimalSpans(ctx context.Context, traceID string, start, end time.Time) ([]MinimalSpan, error)
GetTraceSpansByIDs(ctx context.Context, traceID string, start, end time.Time, spanIDs []string) ([]StorableSpan, error)
GetFlamegraphSpans(ctx context.Context, traceID string, start, end time.Time, spanIDs []string) ([]StorableSpan, error)
GetSpanCountByField(ctx context.Context, traceID string, summary *TraceSummary, fieldKey telemetrytypes.TelemetryFieldKey) (map[string]uint64, error)
GetSpanDurationByField(ctx context.Context, traceID string, summary *TraceSummary, fieldKey telemetrytypes.TelemetryFieldKey) (map[string]uint64, error)

View File

@@ -164,17 +164,6 @@ func (item *MinimalSpan) ToWaterfallSpan(traceID string) *WaterfallSpan {
}
}
func (item *MinimalSpan) ToFlamegraphSpan() *FlamegraphSpan {
return &FlamegraphSpan{
SpanID: item.SpanID,
ParentSpanID: item.ParentSpanID,
Timestamp: uint64(item.StartTime.UnixNano()),
DurationNano: item.DurationNano,
HasError: item.HasError,
Children: make([]*FlamegraphSpan, 0),
}
}
// NewMissingWaterfallSpan creates a synthetic placeholder span for a parent that has no recorded data.
func NewMissingWaterfallSpan(spanID, traceID string, timeUnixNano, durationNano uint64) *WaterfallSpan {
return &WaterfallSpan{
@@ -278,19 +267,6 @@ func (ws *WaterfallSpan) getPathToSelectedSpanID(selectedSpanID string) ([]strin
return nil, false
}
func (item *StorableSpan) AttributeValue(name string) any {
if v, ok := item.AttributesString[name]; ok {
return v
}
if v, ok := item.AttributesNumber[name]; ok {
return v
}
if v, ok := item.AttributesBool[name]; ok {
return v
}
return nil
}
func (item *StorableSpan) Attributes() map[string]any {
attributes := make(map[string]any, len(item.AttributesString)+len(item.AttributesNumber)+len(item.AttributesBool))
for k, v := range item.AttributesString {

View File

@@ -459,6 +459,57 @@ def find_named_result(
)
def assert_scalar_value(
response: requests.Response,
name: str,
expected: Any,
*,
row: int = 0,
col: int = 0,
) -> None:
"""Assert that the named scalar result has `expected` at data[row][col]."""
result = find_named_result(response.json()["data"]["data"]["results"], name)
assert result is not None, f"no result for query {name}"
assert result["data"][row][col] == expected, f"expected {expected} at [{row}][{col}], got {result['data'][row][col]}"
def assert_grouped_scalar(
response: requests.Response,
name: str,
*,
expected_groups: int,
expected_columns: int,
last_col_value: Any | None = None,
) -> None:
"""Assert grouped scalar result has the expected column count and group count.
If `last_col_value` is set and there is exactly one group, also assert the
last column of that single row equals it (a common aggregation-value check)."""
result = find_named_result(response.json()["data"]["data"]["results"], name)
assert result is not None, f"no result for query {name}"
columns = result["columns"]
rows = result["data"]
assert len(columns) == expected_columns, f"expected {expected_columns} columns, got {len(columns)}: {columns}"
assert len(rows) == expected_groups, f"expected {expected_groups} groups, got {len(rows)}: {rows}"
if last_col_value is not None and expected_groups == 1:
assert rows[0][-1] == last_col_value, f"expected last col {last_col_value}, got row {rows[0]}"
def assert_raw_row_subset(
response: requests.Response,
name: str,
expected: dict[str, Any],
*,
row: int = 0,
) -> None:
"""Assert that the named raw result's rows[row]['data'] is a superset of `expected`."""
result = find_named_result(response.json()["data"]["data"]["results"], name)
assert result is not None, f"no result for query {name}"
rows = result["rows"]
assert rows is not None, f"no rows for query {name}"
data = rows[row]["data"]
assert expected.items() <= data.items(), f"expected subset {expected}, got data {data}"
def build_scalar_query(
name: str,
signal: str,

View File

@@ -25,13 +25,22 @@ returnSpansFrom="A"
from collections.abc import Callable
from datetime import UTC, datetime, timedelta
from http import HTTPStatus
from typing import Any
import pytest
import requests
from fixtures import types
from fixtures.auth import USER_ADMIN_EMAIL, USER_ADMIN_PASSWORD
from fixtures.querier import get_rows
from fixtures.querier import (
assert_grouped_scalar,
assert_raw_row_subset,
assert_scalar_value,
format_timestamp,
generate_traces_with_corrupt_metadata,
get_rows,
make_query_request,
)
from fixtures.traces import TraceIdGenerator, Traces, TracesKind, TracesStatusCode
@@ -434,3 +443,173 @@ def test_trace_operator(
)
assert response.status_code == HTTPStatus.OK, f"HTTP {response.status_code}: {response.text}"
assert case["validate"](response), f"validation failed: {response.json()}"
def _expected_trace_subset(trace: Traces) -> dict[str, Any]:
return {
"duration_nano": trace.duration_nano,
"name": trace.name,
"parent_span_id": trace.parent_span_id,
"span_id": trace.span_id,
"timestamp": format_timestamp(trace.timestamp),
"trace_id": trace.trace_id,
}
@pytest.mark.parametrize(
"payload_factory,request_type,assert_result",
[
# Case 1: CTE filter uses the deprecated intrinsic field `durationNano`.
pytest.param(
lambda traces: [
{
"type": "builder_query",
"spec": {
"name": "A",
"signal": "traces",
"filter": {"expression": 'durationNano = "3s"'},
},
},
{
"type": "builder_query",
"spec": {
"name": "B",
"signal": "traces",
"filter": {"expression": 'durationNano = "5s"'},
},
},
{
"type": "builder_trace_operator",
"spec": {
"name": "C",
"expression": "A => B",
"limit": 1,
},
},
],
"raw",
lambda response, traces: assert_raw_row_subset(response, "C", _expected_trace_subset(traces[0])),
id="deprecated-intrinsic-filter",
),
# Case 2: CTE filter uses the deprecated calculated field `responseStatusCode`.
pytest.param(
lambda traces: [
{
"type": "builder_query",
"spec": {
"name": "A",
"signal": "traces",
"filter": {"expression": 'responseStatusCode = "200"'},
},
},
{
"type": "builder_query",
"spec": {
"name": "B",
"signal": "traces",
"filter": {"expression": 'durationNano = "5s"'},
},
},
{
"type": "builder_trace_operator",
"spec": {
"name": "C",
"expression": "A => B",
"limit": 1,
},
},
],
"raw",
lambda response, traces: assert_raw_row_subset(response, "C", _expected_trace_subset(traces[0])),
id="deprecated-calculated-filter",
),
# Case 3: order by uses `count_` with fieldContext `span`, which has
# to be rewritten to the aggregation alias `span.count_`.
pytest.param(
lambda traces: [
{
"type": "builder_query",
"spec": {
"name": "A",
"signal": "traces",
"aggregations": [{"expression": "count()"}],
},
},
{
"type": "builder_trace_operator",
"spec": {
"name": "C",
"expression": "A",
"aggregations": [{"expression": "count()", "alias": "span.count_"}],
"order": [{"key": {"name": "count_", "fieldContext": "span"}, "direction": "desc"}],
},
},
],
"scalar",
lambda response, traces: assert_scalar_value(response, "C", len(traces)),
id="context-prefixed-aggregation-alias-order",
),
# Case 4: group by lists `cloud.provider` twice (once with a resource
# context, once without).
pytest.param(
lambda traces: [
{
"type": "builder_query",
"spec": {
"name": "A",
"signal": "traces",
"disabled": True,
"aggregations": [{"expression": "count()"}],
},
},
{
"type": "builder_trace_operator",
"spec": {
"name": "C",
"expression": "A",
"aggregations": [{"expression": "count()"}],
"groupBy": [
{"name": "cloud.provider", "fieldContext": "resource"},
{"name": "cloud.provider"},
],
},
},
],
"scalar",
lambda response, traces: assert_grouped_scalar(response, "C", expected_groups=1, expected_columns=2, last_col_value=len(traces)),
id="duplicate-group-by-deduplicated",
),
],
)
def test_trace_operator_with_adjusted_keys(
signoz: types.SigNoz,
create_user_admin: None, # pylint: disable=unused-argument
get_token: Callable[[str, str], str],
insert_traces: Callable[[list[Traces]], None],
payload_factory: Callable[[list[Traces]], list[dict[str, Any]]],
request_type: str,
assert_result: Callable[[requests.Response, list[Traces]], None],
) -> None:
"""
Trace operators build a CTE per referenced builder query and an outer
query on top. Both layers need the same key adjustment as regular trace
queries, otherwise deprecated keys and context-prefixed aliases don't
resolve.
"""
traces = generate_traces_with_corrupt_metadata()
insert_traces(traces)
payload = payload_factory(traces)
token = get_token(USER_ADMIN_EMAIL, USER_ADMIN_PASSWORD)
response = make_query_request(
signoz,
token,
start_ms=int((datetime.now(tz=UTC) - timedelta(minutes=5)).timestamp() * 1000),
end_ms=int(datetime.now(tz=UTC).timestamp() * 1000),
request_type=request_type,
queries=payload,
)
assert response.status_code == HTTPStatus.OK, response.text
assert_result(response, traces)