Compare commits

...

7 Commits

Author SHA1 Message Date
Nikhil Soni
3c85421f23 chore: remove flamegraph v2 since we've moved to v4 now
Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
2026-06-09 22:15:29 +05:30
Nikhil Soni
bcacc3d8d7 chore: remove waterfall v2 since we've moved to v4 now 2026-06-09 19:38:07 +05:30
Nikhil Soni
51a7789a29 chore: update openapi specs 2026-06-09 19:22:56 +05:30
Nikhil Soni
19078a9c7f chore: remove select all limit from waterfall request
It was added for testing only and fine tuning the limit value
2026-06-09 19:19:26 +05:30
Nikhil Soni
5a755f54b5 chore: cleanup waterfall v3 api
We've moved to v4 as part of memory allocation optimisation
2026-06-09 19:19:26 +05:30
Aditya Singh
faad1e659a feat: badge color fix (#11625) 2026-06-09 13:37:01 +00:00
Nikhil Soni
e10a9515ef feat(trace-details): add implementation for flamegraph building (part 2) (#11491)
* feat: add config for flamegraph

* Revert "chore: extract out flamegraph building logic for easier review"

This reverts commit f9222c9930.

* chore: remove unwanted absctraction

* refactor: simplify GetSelectedLevels on flamegraph

* refactor: simplify sampling method

* refactor: simplify BFS in getAllLevels method

* chore: fix config key in error messages

* feat: add metrics for flamegraph

* fix: config keys for flamegraph

* fix: handle duplicate spans in flamegraph

* test: add for GetAllLevels and GetSelectedLevel

Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>

---------

Co-authored-by: Claude Sonnet 4.6 <noreply@anthropic.com>
2026-06-09 13:29:32 +00:00
26 changed files with 447 additions and 2260 deletions

View File

@@ -6724,11 +6724,6 @@ components:
type: object
SpantypesGettableWaterfallTrace:
properties:
aggregations:
items:
$ref: '#/components/schemas/SpantypesSpanAggregationResult'
nullable: true
type: array
endTimestampMillis:
minimum: 0
type: integer
@@ -6816,14 +6811,6 @@ components:
type: object
SpantypesPostableWaterfall:
properties:
aggregations:
items:
$ref: '#/components/schemas/SpantypesSpanAggregation'
nullable: true
type: array
limit:
minimum: 0
type: integer
selectedSpanId:
type: string
uncollapsedSpans:
@@ -20679,76 +20666,6 @@ paths:
summary: Get flamegraph view for a trace
tags:
- tracedetail
/api/v3/traces/{traceID}/waterfall:
post:
deprecated: false
description: Returns the waterfall view of spans for a given trace ID with tree
structure, metadata, and windowed pagination
operationId: GetWaterfall
parameters:
- in: path
name: traceID
required: true
schema:
type: string
requestBody:
content:
application/json:
schema:
$ref: '#/components/schemas/SpantypesPostableWaterfall'
responses:
"200":
content:
application/json:
schema:
properties:
data:
$ref: '#/components/schemas/SpantypesGettableWaterfallTrace'
status:
type: string
required:
- status
- data
type: object
description: OK
"400":
content:
application/json:
schema:
$ref: '#/components/schemas/RenderErrorResponse'
description: Bad Request
"401":
content:
application/json:
schema:
$ref: '#/components/schemas/RenderErrorResponse'
description: Unauthorized
"403":
content:
application/json:
schema:
$ref: '#/components/schemas/RenderErrorResponse'
description: Forbidden
"404":
content:
application/json:
schema:
$ref: '#/components/schemas/RenderErrorResponse'
description: Not Found
"500":
content:
application/json:
schema:
$ref: '#/components/schemas/RenderErrorResponse'
description: Internal Server Error
security:
- api_key:
- VIEWER
- tokenizer:
- VIEWER
summary: Get waterfall view for a trace
tags:
- tracedetail
/api/v4/traces/{traceID}/waterfall:
post:
deprecated: false

View File

@@ -8093,10 +8093,6 @@ export interface SpantypesWaterfallSpanDTO {
}
export interface SpantypesGettableWaterfallTraceDTO {
/**
* @type array,null
*/
aggregations?: SpantypesSpanAggregationResultDTO[] | null;
/**
* @type integer
* @minimum 0
@@ -8216,15 +8212,6 @@ export interface SpantypesPostableTraceAggregationsDTO {
}
export interface SpantypesPostableWaterfallDTO {
/**
* @type array,null
*/
aggregations?: SpantypesSpanAggregationDTO[] | null;
/**
* @type integer
* @minimum 0
*/
limit?: number;
/**
* @type string
*/
@@ -10519,17 +10506,6 @@ export type GetFlamegraph200 = {
status: string;
};
export type GetWaterfallPathParameters = {
traceID: string;
};
export type GetWaterfall200 = {
data: SpantypesGettableWaterfallTraceDTO;
/**
* @type string
*/
status: string;
};
export type GetWaterfallV4PathParameters = {
traceID: string;
};

View File

@@ -16,8 +16,6 @@ import type {
GetFlamegraphPathParameters,
GetTraceAggregations200,
GetTraceAggregationsPathParameters,
GetWaterfall200,
GetWaterfallPathParameters,
GetWaterfallV4200,
GetWaterfallV4PathParameters,
RenderErrorResponseDTO,
@@ -228,105 +226,6 @@ export const useGetFlamegraph = <
> => {
return useMutation(getGetFlamegraphMutationOptions(options));
};
/**
* Returns the waterfall view of spans for a given trace ID with tree structure, metadata, and windowed pagination
* @summary Get waterfall view for a trace
*/
export const getWaterfall = (
{ traceID }: GetWaterfallPathParameters,
spantypesPostableWaterfallDTO?: BodyType<SpantypesPostableWaterfallDTO>,
signal?: AbortSignal,
) => {
return GeneratedAPIInstance<GetWaterfall200>({
url: `/api/v3/traces/${traceID}/waterfall`,
method: 'POST',
headers: { 'Content-Type': 'application/json' },
data: spantypesPostableWaterfallDTO,
signal,
});
};
export const getGetWaterfallMutationOptions = <
TError = ErrorType<RenderErrorResponseDTO>,
TContext = unknown,
>(options?: {
mutation?: UseMutationOptions<
Awaited<ReturnType<typeof getWaterfall>>,
TError,
{
pathParams: GetWaterfallPathParameters;
data?: BodyType<SpantypesPostableWaterfallDTO>;
},
TContext
>;
}): UseMutationOptions<
Awaited<ReturnType<typeof getWaterfall>>,
TError,
{
pathParams: GetWaterfallPathParameters;
data?: BodyType<SpantypesPostableWaterfallDTO>;
},
TContext
> => {
const mutationKey = ['getWaterfall'];
const { mutation: mutationOptions } = options
? options.mutation &&
'mutationKey' in options.mutation &&
options.mutation.mutationKey
? options
: { ...options, mutation: { ...options.mutation, mutationKey } }
: { mutation: { mutationKey } };
const mutationFn: MutationFunction<
Awaited<ReturnType<typeof getWaterfall>>,
{
pathParams: GetWaterfallPathParameters;
data?: BodyType<SpantypesPostableWaterfallDTO>;
}
> = (props) => {
const { pathParams, data } = props ?? {};
return getWaterfall(pathParams, data);
};
return { mutationFn, ...mutationOptions };
};
export type GetWaterfallMutationResult = NonNullable<
Awaited<ReturnType<typeof getWaterfall>>
>;
export type GetWaterfallMutationBody =
| BodyType<SpantypesPostableWaterfallDTO>
| undefined;
export type GetWaterfallMutationError = ErrorType<RenderErrorResponseDTO>;
/**
* @summary Get waterfall view for a trace
*/
export const useGetWaterfall = <
TError = ErrorType<RenderErrorResponseDTO>,
TContext = unknown,
>(options?: {
mutation?: UseMutationOptions<
Awaited<ReturnType<typeof getWaterfall>>,
TError,
{
pathParams: GetWaterfallPathParameters;
data?: BodyType<SpantypesPostableWaterfallDTO>;
},
TContext
>;
}): UseMutationResult<
Awaited<ReturnType<typeof getWaterfall>>,
TError,
{
pathParams: GetWaterfallPathParameters;
data?: BodyType<SpantypesPostableWaterfallDTO>;
},
TContext
> => {
return useMutation(getGetWaterfallMutationOptions(options));
};
/**
* Returns the waterfall view of spans including all spans if total spans are under a limit, a max count otherwise. Aggregations are dropped compared to v3
* @summary Get waterfall view for a trace

View File

@@ -27,7 +27,6 @@ const getTraceV4 = async (
{
selectedSpanId: props.selectedSpanId,
uncollapsedSpans,
limit: 10000,
},
);

View File

@@ -22,11 +22,12 @@ export const StyledCheckOutlined = styled(Check)`
float: right;
`;
export const TagContainer = styled(Badge)`
export const TagContainer = styled(Badge).attrs({
color: 'secondary',
variant: 'outline',
})`
&&& {
display: flex;
border-radius: 3px;
padding: 0.1rem 0.2rem;
font-weight: 300;
font-size: 0.6rem;
}
@@ -38,4 +39,5 @@ export const TagLabel = styled.span`
export const TagValue = styled.span`
text-transform: capitalize;
font-weight: 400;
`;

View File

@@ -10,25 +10,6 @@ import (
)
func (provider *provider) addTraceDetailRoutes(router *mux.Router) error {
if err := router.Handle("/api/v3/traces/{traceID}/waterfall", handler.New(
provider.authzMiddleware.ViewAccess(provider.traceDetailHandler.GetWaterfall),
handler.OpenAPIDef{
ID: "GetWaterfall",
Tags: []string{"tracedetail"},
Summary: "Get waterfall view for a trace",
Description: "Returns the waterfall view of spans for a given trace ID with tree structure, metadata, and windowed pagination",
Request: new(spantypes.PostableWaterfall),
RequestContentType: "application/json",
Response: new(spantypes.GettableWaterfallTrace),
ResponseContentType: "application/json",
SuccessStatusCode: http.StatusOK,
ErrorStatusCodes: []int{http.StatusBadRequest, http.StatusNotFound},
SecuritySchemes: newSecuritySchemes(types.RoleViewer),
},
)).Methods(http.MethodPost).GetError(); err != nil {
return err
}
if err := router.Handle("/api/v4/traces/{traceID}/waterfall", handler.New(
provider.authzMiddleware.ViewAccess(provider.traceDetailHandler.GetWaterfallV4),
handler.OpenAPIDef{

View File

@@ -59,19 +59,19 @@ func (c Config) Validate() error {
return errors.NewInvalidInputf(errors.CodeInvalidInput, "traces.waterfall.max_limit_to_select_all_spans must be positive")
}
if c.Flamegraph.MaxSelectedLevels <= 0 {
return errors.NewInvalidInputf(errors.CodeInvalidInput, "tracedetail.flamegraph.level_limit must be positive, got %d", c.Flamegraph.MaxSelectedLevels)
return errors.NewInvalidInputf(errors.CodeInvalidInput, "traces.flamegraph.max_selected_levels must be positive, got %d", c.Flamegraph.MaxSelectedLevels)
}
if c.Flamegraph.MaxSpansPerLevel <= 0 {
return errors.NewInvalidInputf(errors.CodeInvalidInput, "tracedetail.flamegraph.spans_per_level must be positive, got %d", c.Flamegraph.MaxSpansPerLevel)
return errors.NewInvalidInputf(errors.CodeInvalidInput, "traces.flamegraph.max_spans_per_level must be positive, got %d", c.Flamegraph.MaxSpansPerLevel)
}
if c.Flamegraph.SamplingTopLatencySpansCount < 0 {
return errors.NewInvalidInputf(errors.CodeInvalidInput, "tracedetail.flamegraph.top_latency_count cannot be negative, got %d", c.Flamegraph.SamplingTopLatencySpansCount)
return errors.NewInvalidInputf(errors.CodeInvalidInput, "traces.flamegraph.sampling_top_latency_count cannot be negative, got %d", c.Flamegraph.SamplingTopLatencySpansCount)
}
if c.Flamegraph.SamplingBucketCount <= 0 {
return errors.NewInvalidInputf(errors.CodeInvalidInput, "tracedetail.flamegraph.bucket_count must be positive, got %d", c.Flamegraph.SamplingBucketCount)
return errors.NewInvalidInputf(errors.CodeInvalidInput, "traces.flamegraph.sampling_bucket_count must be positive, got %d", c.Flamegraph.SamplingBucketCount)
}
if c.Flamegraph.SelectAllSpansLimit == 0 {
return errors.NewInvalidInputf(errors.CodeInvalidInput, "tracedetail.flamegraph.max_limit_to_select_all_spans must be positive")
return errors.NewInvalidInputf(errors.CodeInvalidInput, "traces.flamegraph.select_all_spans_limit must be positive")
}
return nil
}

View File

@@ -18,27 +18,6 @@ func NewHandler(module tracedetail.Module) tracedetail.Handler {
return &handler{module: module}
}
func (h *handler) GetWaterfall(rw http.ResponseWriter, r *http.Request) {
req := new(spantypes.PostableWaterfall)
if err := binding.JSON.BindBody(r.Body, req); err != nil {
render.Error(rw, err)
return
}
if err := req.Validate(); err != nil {
render.Error(rw, err)
return
}
result, err := h.module.GetWaterfall(r.Context(), mux.Vars(r)["traceID"], req)
if err != nil {
render.Error(rw, err)
return
}
render.Success(rw, http.StatusOK, result)
}
func (h *handler) GetWaterfallV4(rw http.ResponseWriter, r *http.Request) {
req := new(spantypes.PostableWaterfall)
if err := binding.JSON.BindBody(r.Body, req); err != nil {
@@ -51,7 +30,7 @@ func (h *handler) GetWaterfallV4(rw http.ResponseWriter, r *http.Request) {
return
}
result, err := h.module.GetWaterfallV4(r.Context(), mux.Vars(r)["traceID"], req.SelectedSpanID, req.UncollapsedSpans, req.Limit)
result, err := h.module.GetWaterfallV4(r.Context(), mux.Vars(r)["traceID"], req.SelectedSpanID, req.UncollapsedSpans)
if err != nil {
render.Error(rw, err)
return

View File

@@ -34,66 +34,21 @@ func NewModule(traceStore spantypes.TraceStore, providerSettings factory.Provide
}
m.metrics.waterfallSpanLimit.Record(context.Background(), int64(cfg.Waterfall.MaxLimitToSelectAllSpans), metric.WithAttributes(attrResponseType.String(attrResponseTypeWindowed)))
m.metrics.flamegraphSpanLimit.Record(context.Background(), int64(cfg.Flamegraph.SelectAllSpansLimit), metric.WithAttributes(attrResponseType.String(attrResponseTypeSampled)))
return m
}
func (m *module) GetWaterfall(ctx context.Context, traceID string, req *spantypes.PostableWaterfall) (*spantypes.GettableWaterfallTrace, error) {
waterfallTrace, err := m.getTraceData(ctx, traceID)
if err != nil {
return nil, err
}
selectedSpans, uncollapsedSpans, selectedAllSpans := waterfallTrace.GetWaterfallSpans(
req.UncollapsedSpans,
req.SelectedSpanID,
min(req.Limit, m.config.Waterfall.MaxLimitToSelectAllSpans),
m.config.Waterfall.SpanPageSize,
m.config.Waterfall.MaxDepthToAutoExpand,
)
aggregationResults := make([]spantypes.SpanAggregationResult, 0, len(req.Aggregations))
for _, a := range req.Aggregations {
aggregationResults = append(aggregationResults, waterfallTrace.GetSpanAggregation(a.Aggregation, a.Field))
}
return spantypes.NewGettableWaterfallTrace(waterfallTrace, selectedSpans, uncollapsedSpans, selectedAllSpans, aggregationResults), nil
}
// getTraceData fetches all spans for a trace and builds the WaterfallTrace.
func (m *module) getTraceData(ctx context.Context, traceID string) (*spantypes.WaterfallTrace, error) {
summary, err := m.store.GetTraceSummary(ctx, traceID)
if err != nil {
return nil, err
}
spanItems, err := m.store.GetTraceSpans(ctx, traceID, summary)
if err != nil {
return nil, err
}
if len(spanItems) == 0 {
return nil, spantypes.ErrTraceNotFound
}
nodes := make([]*spantypes.WaterfallSpan, len(spanItems))
for i := range spanItems {
nodes[i] = spanItems[i].ToWaterfallSpan(traceID)
}
return spantypes.NewWaterfallTraceFromSpans(nodes), nil
}
// GetWaterfallV4 is the OOM-safe V4 waterfall.
// For large traces (NumSpans > effectiveLimit) it uses a two-step fetch:
// minimal fields for all spans to build the tree, then full fields for the
// visible window only. Aggregations are not returned.
func (m *module) GetWaterfallV4(ctx context.Context, traceID string, selectedSpanID string, uncollapsedSpans []string, selectAllLimit uint) (*spantypes.GettableWaterfallTrace, error) {
func (m *module) GetWaterfallV4(ctx context.Context, traceID string, selectedSpanID string, uncollapsedSpans []string) (*spantypes.GettableWaterfallTrace, error) {
summary, err := m.store.GetTraceSummary(ctx, traceID)
if err != nil {
return nil, err
}
effectiveLimit := min(selectAllLimit, m.config.Waterfall.MaxLimitToSelectAllSpans)
if summary.NumSpans > uint64(effectiveLimit) {
if summary.NumSpans > uint64(m.config.Waterfall.MaxLimitToSelectAllSpans) {
attrs := metric.WithAttributes(attrResponseType.String(attrResponseTypeWindowed))
m.metrics.waterfallRequestCount.Add(ctx, 1, attrs)
m.metrics.waterfallSpanCount.Add(ctx, int64(summary.NumSpans), attrs)
@@ -119,7 +74,7 @@ func (m *module) getFullWaterfall(ctx context.Context, traceID string, summary *
waterfallTrace := spantypes.NewWaterfallTraceFromSpans(nodes)
selectedSpans := waterfallTrace.GetAllSpans()
return spantypes.NewGettableWaterfallTrace(waterfallTrace, selectedSpans, nil, true, nil), nil
return spantypes.NewGettableWaterfallTrace(waterfallTrace, selectedSpans, nil, true), nil
}
func (m *module) GetTraceAggregations(ctx context.Context, traceID string, req *spantypes.PostableTraceAggregations) (*spantypes.GettableTraceAggregations, error) {
@@ -173,6 +128,7 @@ func (m *module) GetFlamegraph(ctx context.Context, traceID string, selectedSpan
if summary.NumSpans <= uint64(m.config.Flamegraph.SelectAllSpansLimit) {
return m.getFullFlamegraph(ctx, traceID, summary, selectFields)
}
m.metrics.flamegraphRequestCount.Add(ctx, 1, metric.WithAttributes(attrResponseType.String(attrResponseTypeSampled)))
return m.getWindowedFlamegraph(ctx, traceID, selectedSpanID, summary, selectFields)
}
@@ -213,7 +169,7 @@ func (m *module) getWindowedWaterfall(ctx context.Context, traceID, selectedSpan
spantypes.EnrichSelectedSpans(selectedSpans, fullSpans)
return spantypes.NewGettableWaterfallTrace(
waterfallTrace, selectedSpans, uncollapsedSpans, false, nil,
waterfallTrace, selectedSpans, uncollapsedSpans, false,
), nil
}

View File

@@ -9,12 +9,16 @@ import (
const (
attrResponseType = attribute.Key("response_type")
attrResponseTypeWindowed = "windowed"
attrResponseTypeSampled = "sampled"
)
type moduleMetrics struct {
waterfallSpanLimit metric.Int64Gauge
waterfallRequestCount metric.Int64Counter
waterfallSpanCount metric.Int64Counter
flamegraphSpanLimit metric.Int64Gauge
flamegraphRequestCount metric.Int64Counter
}
func newModuleMetrics(meter metric.Meter) (*moduleMetrics, error) {
@@ -47,9 +51,30 @@ func newModuleMetrics(meter metric.Meter) (*moduleMetrics, error) {
errs = errors.Join(errs, err)
}
flamegraphSpanLimit, err := meter.Int64Gauge(
"signoz.traces.flamegraph.span.limit",
metric.WithDescription("The span count limit above which sampled flamegraph is returned instead of the full flamegraph."),
metric.WithUnit("{span}"),
)
if err != nil {
errs = errors.Join(errs, err)
}
flamegraphRequestCount, err := meter.Int64Counter(
"signoz.traces.flamegraph.request.count",
metric.WithDescription("Total number of flamegraph requests, by response_type."),
metric.WithUnit("{request}"),
)
if err != nil {
errs = errors.Join(errs, err)
}
return &moduleMetrics{
waterfallSpanLimit: spanLimit,
waterfallRequestCount: requestCount,
waterfallSpanCount: spanCount,
flamegraphSpanLimit: flamegraphSpanLimit,
flamegraphRequestCount: flamegraphRequestCount,
}, errs
}

View File

@@ -260,7 +260,7 @@ func TestGetSelectedSpans_MultipleRoots(t *testing.T) {
trace := getWaterfallTrace([]*spantypes.WaterfallSpan{root1, root2}, spanMap)
spans, _ := trace.GetSelectedSpans([]string{"root1", "root2"}, "root1", 500, 5)
traceRespnose := spantypes.NewGettableWaterfallTrace(trace, spans, nil, false, nil)
traceRespnose := spantypes.NewGettableWaterfallTrace(trace, spans, nil, false)
assert.Equal(t, []string{"root1", "child1", "root2", "child2"}, spanIDs(spans), "root1 subtree must precede root2 subtree")
assert.Equal(t, "svc-a", traceRespnose.RootServiceName, "metadata comes from first root")
@@ -567,7 +567,7 @@ func TestGetAllSpans(t *testing.T) {
)
trace := getWaterfallTrace([]*spantypes.WaterfallSpan{root}, nil)
spans := trace.GetAllSpans()
traceResponse := spantypes.NewGettableWaterfallTrace(trace, spans, nil, true, nil)
traceResponse := spantypes.NewGettableWaterfallTrace(trace, spans, nil, true)
assert.ElementsMatch(t, spanIDs(spans), []string{"root", "childA", "grandchildA", "leafA", "childB", "grandchildB", "leafB"})
assert.Equal(t, "svc", traceResponse.RootServiceName)
assert.Equal(t, "root-op", traceResponse.RootServiceEntryPoint)

View File

@@ -10,7 +10,6 @@ import (
// Handler exposes HTTP handlers for trace detail APIs.
type Handler interface {
GetWaterfall(http.ResponseWriter, *http.Request)
GetWaterfallV4(http.ResponseWriter, *http.Request)
GetTraceAggregations(http.ResponseWriter, *http.Request)
GetFlamegraph(http.ResponseWriter, *http.Request)
@@ -18,8 +17,7 @@ type Handler interface {
// Module defines the business logic for trace detail operations.
type Module interface {
GetWaterfall(ctx context.Context, traceID string, req *spantypes.PostableWaterfall) (*spantypes.GettableWaterfallTrace, error)
GetWaterfallV4(ctx context.Context, traceID string, selectedSpanID string, uncollapsedSpans []string, selectAllLimit uint) (*spantypes.GettableWaterfallTrace, error)
GetWaterfallV4(ctx context.Context, traceID string, selectedSpanID string, uncollapsedSpans []string) (*spantypes.GettableWaterfallTrace, error)
GetTraceAggregations(ctx context.Context, traceID string, req *spantypes.PostableTraceAggregations) (*spantypes.GettableTraceAggregations, error)
GetFlamegraph(ctx context.Context, traceID string, selectedSpanID string, selectFields []telemetrytypes.TelemetryFieldKey) (*spantypes.GettableFlamegraphTrace, error)
}

View File

@@ -18,7 +18,6 @@ import (
"github.com/uptrace/bun"
"github.com/SigNoz/signoz/pkg/prometheus"
"github.com/SigNoz/signoz/pkg/query-service/utils/timestamp"
"github.com/SigNoz/signoz/pkg/sqlstore"
"github.com/SigNoz/signoz/pkg/telemetrystore"
"github.com/SigNoz/signoz/pkg/types"
@@ -47,7 +46,6 @@ import (
"github.com/SigNoz/signoz/pkg/query-service/app/resource"
"github.com/SigNoz/signoz/pkg/query-service/app/services"
"github.com/SigNoz/signoz/pkg/query-service/app/traces/smart"
"github.com/SigNoz/signoz/pkg/query-service/app/traces/tracedetail"
"github.com/SigNoz/signoz/pkg/query-service/common"
"github.com/SigNoz/signoz/pkg/query-service/constants"
@@ -898,390 +896,6 @@ func (r *ClickHouseReader) GetSpansForTrace(ctx context.Context, traceID string,
return searchScanResponses, nil
}
func (r *ClickHouseReader) GetWaterfallSpansForTraceWithMetadataCache(ctx context.Context, orgID valuer.UUID, traceID string) (*model.GetWaterfallSpansForTraceWithMetadataCache, error) {
cachedTraceData := new(model.GetWaterfallSpansForTraceWithMetadataCache)
err := r.cacheForTraceDetail.Get(ctx, orgID, strings.Join([]string{"getWaterfallSpansForTraceWithMetadata", traceID}, "-"), cachedTraceData)
if err != nil {
r.logger.Debug("error in retrieving getWaterfallSpansForTraceWithMetadata cache", errorsV2.Attr(err), "traceID", traceID)
return nil, err
}
if time.Since(time.UnixMilli(int64(cachedTraceData.EndTime))) < r.fluxIntervalForTraceDetail {
r.logger.Info("the trace end time falls under the flux interval, skipping getWaterfallSpansForTraceWithMetadata cache", "traceID", traceID)
return nil, errors.Errorf("the trace end time falls under the flux interval, skipping getWaterfallSpansForTraceWithMetadata cache, traceID: %s", traceID)
}
r.logger.Info("cache is successfully hit, applying cache for getWaterfallSpansForTraceWithMetadata", "traceID", traceID)
return cachedTraceData, nil
}
func (r *ClickHouseReader) GetWaterfallSpansForTraceWithMetadata(ctx context.Context, orgID valuer.UUID, traceID string, req *model.GetWaterfallSpansForTraceWithMetadataParams) (*model.GetWaterfallSpansForTraceWithMetadataResponse, error) {
response := new(model.GetWaterfallSpansForTraceWithMetadataResponse)
var startTime, endTime, durationNano, totalErrorSpans, totalSpans uint64
var spanIdToSpanNodeMap = map[string]*model.Span{}
var traceRoots []*model.Span
var serviceNameToTotalDurationMap = map[string]uint64{}
var serviceNameIntervalMap = map[string][]tracedetail.Interval{}
var hasMissingSpans bool
cachedTraceData, err := r.GetWaterfallSpansForTraceWithMetadataCache(ctx, orgID, traceID)
if err == nil {
startTime = cachedTraceData.StartTime
endTime = cachedTraceData.EndTime
durationNano = cachedTraceData.DurationNano
spanIdToSpanNodeMap = cachedTraceData.SpanIdToSpanNodeMap
serviceNameToTotalDurationMap = cachedTraceData.ServiceNameToTotalDurationMap
traceRoots = cachedTraceData.TraceRoots
totalSpans = cachedTraceData.TotalSpans
totalErrorSpans = cachedTraceData.TotalErrorSpans
hasMissingSpans = cachedTraceData.HasMissingSpans
}
if err != nil {
r.logger.Info("cache miss for getWaterfallSpansForTraceWithMetadata", "traceID", traceID)
searchScanResponses, err := r.GetSpansForTrace(ctx, traceID, fmt.Sprintf("SELECT DISTINCT ON (span_id) timestamp, duration_nano, span_id, trace_id, has_error, kind, resource_string_service$$name, name, links as references, attributes_string, attributes_number, attributes_bool, resources_string, events, status_message, status_code_string, kind_string FROM %s.%s WHERE trace_id=$1 and ts_bucket_start>=$2 and ts_bucket_start<=$3 ORDER BY timestamp ASC, name ASC", r.TraceDB, r.traceTableName))
if err != nil {
return nil, err
}
if len(searchScanResponses) == 0 {
return response, nil
}
totalSpans = uint64(len(searchScanResponses))
for _, item := range searchScanResponses {
ref := []model.OtelSpanRef{}
err := json.Unmarshal([]byte(item.References), &ref)
if err != nil {
r.logger.Error("getWaterfallSpansForTraceWithMetadata: error unmarshalling references", errorsV2.Attr(err), "traceID", traceID)
return nil, errorsV2.Newf(errorsV2.TypeInvalidInput, errorsV2.CodeInvalidInput, "getWaterfallSpansForTraceWithMetadata: error unmarshalling references %s", err.Error())
}
// merge attributes_number and attributes_bool to attributes_string
for k, v := range item.Attributes_bool {
item.Attributes_string[k] = fmt.Sprintf("%v", v)
}
for k, v := range item.Attributes_number {
item.Attributes_string[k] = strconv.FormatFloat(v, 'f', -1, 64)
}
for k, v := range item.Resources_string {
item.Attributes_string[k] = v
}
events := make([]model.Event, 0)
for _, event := range item.Events {
var eventMap model.Event
err = json.Unmarshal([]byte(event), &eventMap)
if err != nil {
r.logger.Error("Error unmarshalling events", errorsV2.Attr(err))
return nil, errorsV2.Newf(errorsV2.TypeInternal, errorsV2.CodeInternal, "getWaterfallSpansForTraceWithMetadata: error in unmarshalling events %s", err.Error())
}
events = append(events, eventMap)
}
startTimeUnixNano := uint64(item.TimeUnixNano.UnixNano())
jsonItem := model.Span{
SpanID: item.SpanID,
TraceID: item.TraceID,
ServiceName: item.ServiceName,
Name: item.Name,
Kind: int32(item.Kind),
DurationNano: item.DurationNano,
HasError: item.HasError,
StatusMessage: item.StatusMessage,
StatusCodeString: item.StatusCodeString,
SpanKind: item.SpanKind,
References: ref,
Events: events,
TagMap: item.Attributes_string,
Children: make([]*model.Span, 0),
TimeUnixNano: startTimeUnixNano, // Store nanoseconds temporarily
}
// metadata calculation
if startTime == 0 || startTimeUnixNano < startTime {
startTime = startTimeUnixNano
}
if endTime == 0 || (startTimeUnixNano+jsonItem.DurationNano) > endTime {
endTime = (startTimeUnixNano + jsonItem.DurationNano)
}
if durationNano == 0 || jsonItem.DurationNano > durationNano {
durationNano = jsonItem.DurationNano
}
if jsonItem.HasError {
totalErrorSpans = totalErrorSpans + 1
}
// collect the intervals for service for execution time calculation
serviceNameIntervalMap[jsonItem.ServiceName] =
append(serviceNameIntervalMap[jsonItem.ServiceName], tracedetail.Interval{StartTime: jsonItem.TimeUnixNano, Duration: jsonItem.DurationNano, Service: jsonItem.ServiceName})
// append to the span node map
spanIdToSpanNodeMap[jsonItem.SpanID] = &jsonItem
}
// traverse through the map and append each node to the children array of the parent node
// and add the missing spans
for _, spanNode := range spanIdToSpanNodeMap {
hasParentSpanNode := false
for _, reference := range spanNode.References {
if reference.RefType == "CHILD_OF" && reference.SpanId != "" {
hasParentSpanNode = true
if parentNode, exists := spanIdToSpanNodeMap[reference.SpanId]; exists {
parentNode.Children = append(parentNode.Children, spanNode)
} else {
// insert the missing span
missingSpan := model.Span{
SpanID: reference.SpanId,
TraceID: spanNode.TraceID,
ServiceName: "",
Name: "Missing Span",
TimeUnixNano: spanNode.TimeUnixNano,
Kind: 0,
DurationNano: spanNode.DurationNano,
HasError: false,
StatusMessage: "",
StatusCodeString: "",
SpanKind: "",
Events: make([]model.Event, 0),
Children: make([]*model.Span, 0),
}
missingSpan.Children = append(missingSpan.Children, spanNode)
spanIdToSpanNodeMap[missingSpan.SpanID] = &missingSpan
traceRoots = append(traceRoots, &missingSpan)
hasMissingSpans = true
}
}
}
if !hasParentSpanNode && !tracedetail.ContainsWaterfallSpan(traceRoots, spanNode) {
traceRoots = append(traceRoots, spanNode)
}
}
// sort the trace roots to add missing spans at the right order
sort.Slice(traceRoots, func(i, j int) bool {
if traceRoots[i].TimeUnixNano == traceRoots[j].TimeUnixNano {
return traceRoots[i].Name < traceRoots[j].Name
}
return traceRoots[i].TimeUnixNano < traceRoots[j].TimeUnixNano
})
serviceNameToTotalDurationMap = tracedetail.CalculateServiceTime(serviceNameIntervalMap)
// TODO: set the span data (model.GetWaterfallSpansForTraceWithMetadataCache) in cache here
// removed existing cache usage since it was not getting used due to this bug https://github.com/SigNoz/engineering-pod/issues/4648
// and was causing out of memory issues https://github.com/SigNoz/engineering-pod/issues/4638
}
processingPostCache := time.Now()
// When req.Limit is 0 (not set by the client), selectAllSpans is set to false
// preserving the old paged behaviour for backward compatibility
limit := min(req.Limit, tracedetail.MaxLimitToSelectAllSpans)
selectAllSpans := totalSpans <= uint64(limit)
var (
selectedSpans []*model.Span
uncollapsedSpans []string
rootServiceName, rootServiceEntryPoint string
)
if selectAllSpans {
selectedSpans, rootServiceName, rootServiceEntryPoint = tracedetail.GetAllSpans(traceRoots)
} else {
selectedSpans, uncollapsedSpans, rootServiceName, rootServiceEntryPoint = tracedetail.GetSelectedSpans(req.UncollapsedSpans, req.SelectedSpanID, traceRoots, spanIdToSpanNodeMap, req.IsSelectedSpanIDUnCollapsed)
}
r.logger.Info("getWaterfallSpansForTraceWithMetadata: processing post cache", "duration", time.Since(processingPostCache), "traceID", traceID)
// convert start timestamp to millis because right now frontend is expecting it in millis
for _, span := range selectedSpans {
span.TimeUnixNano = span.TimeUnixNano / 1000000
}
for serviceName, totalDuration := range serviceNameToTotalDurationMap {
serviceNameToTotalDurationMap[serviceName] = totalDuration / 1000000
}
response.Spans = selectedSpans
response.UncollapsedSpans = uncollapsedSpans // ignoring if all spans are returning
response.StartTimestampMillis = startTime / 1000000
response.EndTimestampMillis = endTime / 1000000
response.TotalSpansCount = totalSpans
response.TotalErrorSpansCount = totalErrorSpans
response.RootServiceName = rootServiceName
response.RootServiceEntryPoint = rootServiceEntryPoint
response.ServiceNameToTotalDurationMap = serviceNameToTotalDurationMap
response.HasMissingSpans = hasMissingSpans
response.HasMore = !selectAllSpans
return response, nil
}
func (r *ClickHouseReader) GetFlamegraphSpansForTraceCache(ctx context.Context, orgID valuer.UUID, traceID string) (*model.GetFlamegraphSpansForTraceCache, error) {
cachedTraceData := new(model.GetFlamegraphSpansForTraceCache)
err := r.cacheForTraceDetail.Get(ctx, orgID, strings.Join([]string{"getFlamegraphSpansForTrace", traceID}, "-"), cachedTraceData)
if err != nil {
r.logger.Debug("error in retrieving getFlamegraphSpansForTrace cache", errorsV2.Attr(err), "traceID", traceID)
return nil, err
}
if time.Since(time.UnixMilli(int64(cachedTraceData.EndTime))) < r.fluxIntervalForTraceDetail {
r.logger.Info("the trace end time falls under the flux interval, skipping getFlamegraphSpansForTrace cache", "traceID", traceID)
return nil, errors.Errorf("the trace end time falls under the flux interval, skipping getFlamegraphSpansForTrace cache, traceID: %s", traceID)
}
r.logger.Info("cache is successfully hit, applying cache for getFlamegraphSpansForTrace", "traceID", traceID)
return cachedTraceData, nil
}
func (r *ClickHouseReader) GetFlamegraphSpansForTrace(ctx context.Context, orgID valuer.UUID, traceID string, req *model.GetFlamegraphSpansForTraceParams) (*model.GetFlamegraphSpansForTraceResponse, error) {
trace := new(model.GetFlamegraphSpansForTraceResponse)
var startTime, endTime, durationNano uint64
var spanIdToSpanNodeMap = map[string]*model.FlamegraphSpan{}
// map[traceID][level]span
var selectedSpans = [][]*model.FlamegraphSpan{}
var traceRoots []*model.FlamegraphSpan
// get the trace tree from cache!
cachedTraceData, err := r.GetFlamegraphSpansForTraceCache(ctx, orgID, traceID)
if err == nil {
startTime = cachedTraceData.StartTime
endTime = cachedTraceData.EndTime
durationNano = cachedTraceData.DurationNano
selectedSpans = cachedTraceData.SelectedSpans
traceRoots = cachedTraceData.TraceRoots
}
if err != nil {
r.logger.Info("cache miss for getFlamegraphSpansForTrace", "traceID", traceID)
selectCols := "timestamp, duration_nano, span_id, trace_id, has_error, links as references, resource_string_service$$name, name, events"
if len(req.SelectFields) > 0 {
selectCols += ", attributes_string, attributes_number, attributes_bool, resources_string"
}
flamegraphQuery := fmt.Sprintf("SELECT %s FROM %s.%s WHERE trace_id=$1 and ts_bucket_start>=$2 and ts_bucket_start<=$3 ORDER BY timestamp ASC, name ASC", selectCols, r.TraceDB, r.traceTableName)
searchScanResponses, err := r.GetSpansForTrace(ctx, traceID, flamegraphQuery)
if err != nil {
return nil, err
}
if len(searchScanResponses) == 0 {
return trace, nil
}
for _, item := range searchScanResponses {
ref := []model.OtelSpanRef{}
err := json.Unmarshal([]byte(item.References), &ref)
if err != nil {
r.logger.Error("Error unmarshalling references", errorsV2.Attr(err))
return nil, errorsV2.Newf(errorsV2.TypeInternal, errorsV2.CodeInternal, "getFlamegraphSpansForTrace: error in unmarshalling references %s", err.Error())
}
events := make([]model.Event, 0)
for _, event := range item.Events {
var eventMap model.Event
err = json.Unmarshal([]byte(event), &eventMap)
if err != nil {
r.logger.Error("Error unmarshalling events", errorsV2.Attr(err))
return nil, errorsV2.Newf(errorsV2.TypeInternal, errorsV2.CodeInternal, "getFlamegraphSpansForTrace: error in unmarshalling events %s", err.Error())
}
events = append(events, eventMap)
}
jsonItem := model.FlamegraphSpan{
SpanID: item.SpanID,
TraceID: item.TraceID,
ServiceName: item.ServiceName,
Name: item.Name,
DurationNano: item.DurationNano,
HasError: item.HasError,
References: ref,
Events: events,
Children: make([]*model.FlamegraphSpan, 0),
}
if len(req.SelectFields) > 0 {
jsonItem.SetRequestedFields(item, req.SelectFields)
}
// metadata calculation
startTimeUnixNano := uint64(item.TimeUnixNano.UnixNano())
if startTime == 0 || startTimeUnixNano < startTime {
startTime = startTimeUnixNano
}
if endTime == 0 || (startTimeUnixNano+jsonItem.DurationNano) > endTime {
endTime = (startTimeUnixNano + jsonItem.DurationNano)
}
if durationNano == 0 || jsonItem.DurationNano > durationNano {
durationNano = jsonItem.DurationNano
}
jsonItem.TimeUnixNano = uint64(item.TimeUnixNano.UnixNano() / 1000000)
spanIdToSpanNodeMap[jsonItem.SpanID] = &jsonItem
}
// traverse through the map and append each node to the children array of the parent node
// and add missing spans
for _, spanNode := range spanIdToSpanNodeMap {
hasParentSpanNode := false
for _, reference := range spanNode.References {
if reference.RefType == "CHILD_OF" && reference.SpanId != "" {
hasParentSpanNode = true
if parentNode, exists := spanIdToSpanNodeMap[reference.SpanId]; exists {
parentNode.Children = append(parentNode.Children, spanNode)
} else {
// insert the missing spans
missingSpan := model.FlamegraphSpan{
SpanID: reference.SpanId,
TraceID: spanNode.TraceID,
ServiceName: "",
Name: "Missing Span",
TimeUnixNano: spanNode.TimeUnixNano,
DurationNano: spanNode.DurationNano,
HasError: false,
Events: make([]model.Event, 0),
Children: make([]*model.FlamegraphSpan, 0),
}
missingSpan.Children = append(missingSpan.Children, spanNode)
spanIdToSpanNodeMap[missingSpan.SpanID] = &missingSpan
traceRoots = append(traceRoots, &missingSpan)
}
}
}
if !hasParentSpanNode && !tracedetail.ContainsFlamegraphSpan(traceRoots, spanNode) {
traceRoots = append(traceRoots, spanNode)
}
}
selectedSpans = tracedetail.GetAllSpansForFlamegraph(traceRoots, spanIdToSpanNodeMap)
// TODO: set the trace data (model.GetFlamegraphSpansForTraceCache) in cache here
// removed existing cache usage since it was not getting used due to this bug https://github.com/SigNoz/engineering-pod/issues/4648
// and was causing out of memory issues https://github.com/SigNoz/engineering-pod/issues/4638
}
processingPostCache := time.Now()
selectedSpansForRequest := selectedSpans
clientLimit := min(req.Limit, tracedetail.MaxLimitWithoutSampling)
totalSpanCount := tracedetail.GetTotalSpanCount(selectedSpans)
if totalSpanCount > uint64(clientLimit) {
// using trace start and end time if boundary ts are set to zero (or not set)
boundaryStart := max(timestamp.MilliToNano(req.BoundaryStartTS), startTime)
boundaryEnd := timestamp.MilliToNano(req.BoundaryEndTS)
if boundaryEnd == 0 {
boundaryEnd = endTime
}
selectedSpansForRequest = tracedetail.GetSelectedSpansForFlamegraphForRequest(req.SelectedSpanID, selectedSpans, boundaryStart, boundaryEnd)
}
r.logger.Debug("getFlamegraphSpansForTrace: processing post cache", "duration", time.Since(processingPostCache), "traceID", traceID, "totalSpans", totalSpanCount, "limit", clientLimit)
trace.Spans = selectedSpansForRequest
trace.StartTimestampMillis = startTime / 1000000
trace.EndTimestampMillis = endTime / 1000000
trace.HasMore = totalSpanCount > uint64(clientLimit)
return trace, nil
}
func (r *ClickHouseReader) GetDependencyGraph(ctx context.Context, queryParams *model.GetServicesParams) (*[]model.ServiceMapDependencyResponseItem, error) {

View File

@@ -534,8 +534,8 @@ func (aH *APIHandler) RegisterRoutes(router *mux.Router, am *middleware.AuthZ) {
router.HandleFunc("/api/v2/traces/fields", am.ViewAccess(aH.traceFields)).Methods(http.MethodGet)
router.HandleFunc("/api/v2/traces/fields", am.EditAccess(aH.updateTraceField)).Methods(http.MethodPost)
router.HandleFunc("/api/v2/traces/flamegraph/{traceId}", am.ViewAccess(aH.GetFlamegraphSpansForTrace)).Methods(http.MethodPost)
router.HandleFunc("/api/v2/traces/waterfall/{traceId}", am.ViewAccess(aH.GetWaterfallSpansForTraceWithMetadata)).Methods(http.MethodPost)
router.HandleFunc("/api/v1/version", am.OpenAccess(aH.getVersion)).Methods(http.MethodGet)
router.HandleFunc("/api/v1/features", am.ViewAccess(aH.getFeatureFlags)).Methods(http.MethodGet)
@@ -1446,73 +1446,6 @@ func (aH *APIHandler) SearchTraces(w http.ResponseWriter, r *http.Request) {
}
func (aH *APIHandler) GetWaterfallSpansForTraceWithMetadata(w http.ResponseWriter, r *http.Request) {
claims, err := authtypes.ClaimsFromContext(r.Context())
if err != nil {
render.Error(w, err)
return
}
orgID, err := valuer.NewUUID(claims.OrgID)
if err != nil {
render.Error(w, err)
return
}
traceID := mux.Vars(r)["traceId"]
if traceID == "" {
render.Error(w, errors.NewInvalidInputf(errors.CodeInvalidInput, "traceID is required"))
return
}
req := new(model.GetWaterfallSpansForTraceWithMetadataParams)
err = json.NewDecoder(r.Body).Decode(&req)
if err != nil {
RespondError(w, model.BadRequest(err), nil)
return
}
result, apiErr := aH.reader.GetWaterfallSpansForTraceWithMetadata(r.Context(), orgID, traceID, req)
if apiErr != nil {
render.Error(w, apiErr)
return
}
aH.WriteJSON(w, r, result)
}
func (aH *APIHandler) GetFlamegraphSpansForTrace(w http.ResponseWriter, r *http.Request) {
claims, err := authtypes.ClaimsFromContext(r.Context())
if err != nil {
render.Error(w, err)
return
}
orgID, err := valuer.NewUUID(claims.OrgID)
if err != nil {
render.Error(w, err)
return
}
traceID := mux.Vars(r)["traceId"]
if traceID == "" {
render.Error(w, errors.NewInvalidInputf(errors.CodeInvalidInput, "traceID is required"))
return
}
req := new(model.GetFlamegraphSpansForTraceParams)
err = json.NewDecoder(r.Body).Decode(&req)
if err != nil {
RespondError(w, model.BadRequest(err), nil)
return
}
result, apiErr := aH.reader.GetFlamegraphSpansForTrace(r.Context(), orgID, traceID, req)
if apiErr != nil {
render.Error(w, apiErr)
return
}
aH.WriteJSON(w, r, result)
}
func (aH *APIHandler) listErrors(w http.ResponseWriter, r *http.Request) {
query, err := parseListErrorsRequest(r)

View File

@@ -1,199 +0,0 @@
package tracedetail
import (
"sort"
"github.com/SigNoz/signoz/pkg/query-service/model"
)
var (
flamegraphSpanLevelLimit float64 = 50
flamegraphSpanLimitPerLevel int = 100
flamegraphSamplingBucketCount int = 50
flamegraphTopLatencySpanCount int = 5
MaxLimitWithoutSampling uint = 120_000
)
func ContainsFlamegraphSpan(slice []*model.FlamegraphSpan, item *model.FlamegraphSpan) bool {
for _, v := range slice {
if v.SpanID == item.SpanID {
return true
}
}
return false
}
func BfsTraversalForTrace(span *model.FlamegraphSpan, level int64) map[int64][]*model.FlamegraphSpan {
bfs := map[int64][]*model.FlamegraphSpan{}
bfs[level] = []*model.FlamegraphSpan{span}
for _, child := range span.Children {
childBfsMap := BfsTraversalForTrace(child, level+1)
for _level, nodes := range childBfsMap {
bfs[_level] = append(bfs[_level], nodes...)
}
}
span.Level = level
span.Children = make([]*model.FlamegraphSpan, 0)
return bfs
}
func FindIndexForSelectedSpan(spans [][]*model.FlamegraphSpan, selectedSpanId string) int {
var selectedSpanLevel int = 0
for index, _spans := range spans {
for _, span := range _spans {
if span.SpanID == selectedSpanId {
selectedSpanLevel = index
break
}
}
}
return selectedSpanLevel
}
// GetAllSpansForFlamegraph groups all spans as per their level
func GetAllSpansForFlamegraph(traceRoots []*model.FlamegraphSpan, spanIdToSpanNodeMap map[string]*model.FlamegraphSpan) [][]*model.FlamegraphSpan {
var traceIdLevelledFlamegraph = map[string]map[int64][]*model.FlamegraphSpan{}
selectedSpans := [][]*model.FlamegraphSpan{}
// sort the trace roots to add missing spans at the right order
sort.Slice(traceRoots, func(i, j int) bool {
if traceRoots[i].TimeUnixNano == traceRoots[j].TimeUnixNano {
return traceRoots[i].Name < traceRoots[j].Name
}
return traceRoots[i].TimeUnixNano < traceRoots[j].TimeUnixNano
})
for _, rootSpanID := range traceRoots {
if rootNode, exists := spanIdToSpanNodeMap[rootSpanID.SpanID]; exists {
bfsMapForTrace := BfsTraversalForTrace(rootNode, 0)
traceIdLevelledFlamegraph[rootSpanID.SpanID] = bfsMapForTrace
}
}
for _, trace := range traceRoots {
keys := make([]int64, 0, len(traceIdLevelledFlamegraph[trace.SpanID]))
for key := range traceIdLevelledFlamegraph[trace.SpanID] {
keys = append(keys, key)
}
sort.Slice(keys, func(i, j int) bool {
return keys[i] < keys[j]
})
for _, level := range keys {
if ok, exists := traceIdLevelledFlamegraph[trace.SpanID][level]; exists {
selectedSpans = append(selectedSpans, ok)
}
}
}
return selectedSpans
}
func getLatencyAndTimestampBucketedSpans(spans []*model.FlamegraphSpan, selectedSpanID string, isSelectedSpanIDPresent bool, startTime uint64, endTime uint64) []*model.FlamegraphSpan {
var sampledSpans []*model.FlamegraphSpan
// sort the spans by latency for latency filtering
sort.Slice(spans, func(i, j int) bool {
return spans[i].DurationNano > spans[j].DurationNano
})
// pick the top 5 latency spans
for idx := range flamegraphTopLatencySpanCount {
sampledSpans = append(sampledSpans, spans[idx])
}
// always add the selectedSpan
if isSelectedSpanIDPresent {
idx := -1
for _idx, span := range spans {
if span.SpanID == selectedSpanID {
idx = _idx
}
}
if idx != -1 {
sampledSpans = append(sampledSpans, spans[idx])
}
}
bucketSize := (endTime - startTime) / uint64(flamegraphSamplingBucketCount)
if bucketSize == 0 {
bucketSize = 1
}
bucketedSpans := make([][]*model.FlamegraphSpan, flamegraphSamplingBucketCount)
for _, span := range spans {
if span.TimeUnixNano >= startTime && span.TimeUnixNano <= endTime {
bucketIndex := int((span.TimeUnixNano - startTime) / bucketSize)
if bucketIndex >= 0 && bucketIndex < flamegraphSamplingBucketCount {
bucketedSpans[bucketIndex] = append(bucketedSpans[bucketIndex], span)
}
}
}
for i := range bucketedSpans {
if len(bucketedSpans[i]) > 2 {
// Keep only the first 2 spans
bucketedSpans[i] = bucketedSpans[i][:2]
}
}
// Flatten the bucketed spans into a single slice
for _, bucket := range bucketedSpans {
sampledSpans = append(sampledSpans, bucket...)
}
return sampledSpans
}
func GetSelectedSpansForFlamegraphForRequest(selectedSpanID string, selectedSpans [][]*model.FlamegraphSpan, startTime uint64, endTime uint64) [][]*model.FlamegraphSpan {
var selectedSpansForRequest = make([][]*model.FlamegraphSpan, 0)
var selectedIndex = 0
if selectedSpanID != "" {
selectedIndex = FindIndexForSelectedSpan(selectedSpans, selectedSpanID)
}
lowerLimit := selectedIndex - int(flamegraphSpanLevelLimit*0.4)
upperLimit := selectedIndex + int(flamegraphSpanLevelLimit*0.6)
if lowerLimit < 0 {
upperLimit = upperLimit - lowerLimit
lowerLimit = 0
}
if upperLimit > len(selectedSpans) {
lowerLimit = lowerLimit - (upperLimit - len(selectedSpans))
upperLimit = len(selectedSpans)
}
if lowerLimit < 0 {
lowerLimit = 0
}
for i := lowerLimit; i < upperLimit; i++ {
if len(selectedSpans[i]) > flamegraphSpanLimitPerLevel {
_spans := getLatencyAndTimestampBucketedSpans(selectedSpans[i], selectedSpanID, i == selectedIndex, startTime, endTime)
selectedSpansForRequest = append(selectedSpansForRequest, _spans)
} else {
selectedSpansForRequest = append(selectedSpansForRequest, selectedSpans[i])
}
}
return selectedSpansForRequest
}
func GetTotalSpanCount(spans [][]*model.FlamegraphSpan) uint64 {
levelCount := len(spans)
spanCount := uint64(0)
for i := range levelCount {
spanCount += uint64(len(spans[i]))
}
return spanCount
}

View File

@@ -1,287 +0,0 @@
package tracedetail
import (
"maps"
"slices"
"sort"
"github.com/SigNoz/signoz/pkg/query-service/model"
)
var (
SPAN_LIMIT_PER_REQUEST_FOR_WATERFALL float64 = 500
maxDepthForSelectedSpanChildren int = 5
MaxLimitToSelectAllSpans uint = 10_000
)
type Interval struct {
StartTime uint64
Duration uint64
Service string
}
func mergeIntervals(intervals []Interval) []Interval {
if len(intervals) == 0 {
return nil
}
var merged []Interval
current := intervals[0]
for i := 1; i < len(intervals); i++ {
next := intervals[i]
if current.StartTime+current.Duration >= next.StartTime {
endTime := max(current.StartTime+current.Duration, next.StartTime+next.Duration)
current.Duration = endTime - current.StartTime
} else {
merged = append(merged, current)
current = next
}
}
// Add the last interval
merged = append(merged, current)
return merged
}
func ContainsWaterfallSpan(slice []*model.Span, item *model.Span) bool {
for _, v := range slice {
if v.SpanID == item.SpanID {
return true
}
}
return false
}
func findIndexForSelectedSpanFromPreOrder(spans []*model.Span, selectedSpanId string) int {
var selectedSpanIndex = -1
for index, span := range spans {
if span.SpanID == selectedSpanId {
selectedSpanIndex = index
break
}
}
return selectedSpanIndex
}
func getPathFromRootToSelectedSpanId(node *model.Span, selectedSpanId string) (bool, []string) {
spansFromRootToNode := []string{}
spansFromRootToNode = append(spansFromRootToNode, node.SpanID)
if node.SpanID == selectedSpanId {
return true, spansFromRootToNode
}
isPresentInSubtreeForTheNode := false
for _, child := range node.Children {
isPresentInThisSubtree, _spansFromRootToNode := getPathFromRootToSelectedSpanId(child, selectedSpanId)
// if the interested node is present in the given subtree then add the span node to uncollapsed node list
if isPresentInThisSubtree {
isPresentInSubtreeForTheNode = true
spansFromRootToNode = append(spansFromRootToNode, _spansFromRootToNode...)
break
}
}
return isPresentInSubtreeForTheNode, spansFromRootToNode
}
// traverseOpts holds the traversal configuration that remains constant
// throughout the recursion. Per-call state (level, isPartOfPreOrder, etc.)
// is passed as direct arguments.
type traverseOpts struct {
uncollapsedSpans map[string]struct{}
selectedSpanID string
isSelectedSpanUncollapsed bool
selectAll bool
}
func traverseTrace(
span *model.Span,
opts traverseOpts,
level uint64,
isPartOfPreOrder bool,
hasSibling bool,
autoExpandDepth int,
) ([]*model.Span, []string) {
preOrderTraversal := []*model.Span{}
autoExpandedSpans := []string{}
// sort the children to maintain the order across requests
sort.Slice(span.Children, func(i, j int) bool {
if span.Children[i].TimeUnixNano == span.Children[j].TimeUnixNano {
return span.Children[i].Name < span.Children[j].Name
}
return span.Children[i].TimeUnixNano < span.Children[j].TimeUnixNano
})
span.SubTreeNodeCount = 0
nodeWithoutChildren := model.Span{
SpanID: span.SpanID,
TraceID: span.TraceID,
ServiceName: span.ServiceName,
TimeUnixNano: span.TimeUnixNano,
Name: span.Name,
Kind: int32(span.Kind),
DurationNano: span.DurationNano,
HasError: span.HasError,
StatusMessage: span.StatusMessage,
StatusCodeString: span.StatusCodeString,
SpanKind: span.SpanKind,
References: span.References,
Events: span.Events,
TagMap: span.TagMap,
Children: make([]*model.Span, 0),
HasChildren: len(span.Children) > 0,
Level: level,
HasSiblings: hasSibling,
SubTreeNodeCount: 0,
}
if isPartOfPreOrder {
preOrderTraversal = append(preOrderTraversal, &nodeWithoutChildren)
}
remainingAutoExpandDepth := 0
if span.SpanID == opts.selectedSpanID && opts.isSelectedSpanUncollapsed {
remainingAutoExpandDepth = maxDepthForSelectedSpanChildren
} else if autoExpandDepth > 0 {
remainingAutoExpandDepth = autoExpandDepth - 1
}
_, isAlreadyUncollapsed := opts.uncollapsedSpans[span.SpanID]
for index, child := range span.Children {
// A child is included in the pre-order output if its parent is uncollapsed
// OR if the child falls within MAX_DEPTH_FOR_SELECTED_SPAN_CHILDREN levels
// below the selected span.
isChildWithinMaxDepth := remainingAutoExpandDepth > 0
childIsPartOfPreOrder := opts.selectAll || (isPartOfPreOrder && (isAlreadyUncollapsed || isChildWithinMaxDepth))
if isPartOfPreOrder && isChildWithinMaxDepth && !isAlreadyUncollapsed {
if !slices.Contains(autoExpandedSpans, span.SpanID) {
autoExpandedSpans = append(autoExpandedSpans, span.SpanID)
}
}
_childTraversal, _autoExpanded := traverseTrace(child, opts, level+1, childIsPartOfPreOrder, index != (len(span.Children)-1), remainingAutoExpandDepth)
preOrderTraversal = append(preOrderTraversal, _childTraversal...)
autoExpandedSpans = append(autoExpandedSpans, _autoExpanded...)
nodeWithoutChildren.SubTreeNodeCount += child.SubTreeNodeCount + 1
span.SubTreeNodeCount += child.SubTreeNodeCount + 1
}
nodeWithoutChildren.SubTreeNodeCount += 1
return preOrderTraversal, autoExpandedSpans
}
func CalculateServiceTime(serviceIntervals map[string][]Interval) map[string]uint64 {
totalTimes := make(map[string]uint64)
for service, serviceIntervals := range serviceIntervals {
sort.Slice(serviceIntervals, func(i, j int) bool {
return serviceIntervals[i].StartTime < serviceIntervals[j].StartTime
})
mergedIntervals := mergeIntervals(serviceIntervals)
totalTime := uint64(0)
for _, interval := range mergedIntervals {
totalTime += interval.Duration
}
totalTimes[service] = totalTime
}
return totalTimes
}
func GetSelectedSpans(uncollapsedSpans []string, selectedSpanID string, traceRoots []*model.Span, spanIdToSpanNodeMap map[string]*model.Span, isSelectedSpanIDUnCollapsed bool) ([]*model.Span, []string, string, string) {
var preOrderTraversal = make([]*model.Span, 0)
var rootServiceName, rootServiceEntryPoint string
// create a map of uncollapsed spans for quick lookup
uncollapsedSpanMap := make(map[string]struct{})
for _, spanID := range uncollapsedSpans {
uncollapsedSpanMap[spanID] = struct{}{}
}
selectedSpanIndex := -1
for _, rootSpanID := range traceRoots {
if rootNode, exists := spanIdToSpanNodeMap[rootSpanID.SpanID]; exists {
present, spansFromRootToNode := getPathFromRootToSelectedSpanId(rootNode, selectedSpanID)
if present {
for _, spanID := range spansFromRootToNode {
if selectedSpanID == spanID && !isSelectedSpanIDUnCollapsed {
continue
}
uncollapsedSpanMap[spanID] = struct{}{}
}
}
opts := traverseOpts{
uncollapsedSpans: uncollapsedSpanMap,
selectedSpanID: selectedSpanID,
isSelectedSpanUncollapsed: isSelectedSpanIDUnCollapsed,
}
_preOrderTraversal, _autoExpanded := traverseTrace(rootNode, opts, 0, true, false, 0)
// Merge auto-expanded spans into updatedUncollapsedSpans for returning in response
for _, spanID := range _autoExpanded {
uncollapsedSpanMap[spanID] = struct{}{}
}
_selectedSpanIndex := findIndexForSelectedSpanFromPreOrder(_preOrderTraversal, selectedSpanID)
if _selectedSpanIndex != -1 {
selectedSpanIndex = _selectedSpanIndex + len(preOrderTraversal)
}
preOrderTraversal = append(preOrderTraversal, _preOrderTraversal...)
if rootServiceName == "" {
rootServiceName = rootNode.ServiceName
}
if rootServiceEntryPoint == "" {
rootServiceEntryPoint = rootNode.Name
}
}
}
// if we couldn't find the selectedSpan in the trace then defaulting the selected index to 0
if selectedSpanIndex == -1 && selectedSpanID != "" {
selectedSpanIndex = 0
}
// get the 0.4*[span limit] before the interested span index
startIndex := selectedSpanIndex - int(SPAN_LIMIT_PER_REQUEST_FOR_WATERFALL*0.4)
// get the 0.6*[span limit] after the intrested span index
endIndex := selectedSpanIndex + int(SPAN_LIMIT_PER_REQUEST_FOR_WATERFALL*0.6)
// adjust the sliding window according to the available left and right spaces.
if startIndex < 0 {
endIndex = endIndex - startIndex
startIndex = 0
}
if endIndex > len(preOrderTraversal) {
startIndex = startIndex - (endIndex - len(preOrderTraversal))
endIndex = len(preOrderTraversal)
}
if startIndex < 0 {
startIndex = 0
}
return preOrderTraversal[startIndex:endIndex], slices.Collect(maps.Keys(uncollapsedSpanMap)), rootServiceName, rootServiceEntryPoint
}
func GetAllSpans(traceRoots []*model.Span) (spans []*model.Span, rootServiceName, rootEntryPoint string) {
if len(traceRoots) > 0 {
rootServiceName = traceRoots[0].ServiceName
rootEntryPoint = traceRoots[0].Name
}
for _, root := range traceRoots {
childSpans, _ := traverseTrace(root, traverseOpts{selectAll: true}, 0, true, false, 0)
spans = append(spans, childSpans...)
}
return
}

View File

@@ -1,446 +0,0 @@
// Package tracedetail tests — waterfall
//
// # Background
//
// The waterfall view renders a trace as a scrollable list of spans in
// pre-order (parent before children, siblings left-to-right). Because a trace
// can have thousands of spans, only a window of ~500 is returned per request.
// The window is centred on the selected span.
//
// # Key concepts
//
// uncollapsedSpans
//
// The set of span IDs the user has manually expanded in the UI.
// Only the direct children of an uncollapsed span are included in the
// output; grandchildren stay hidden until their parent is also uncollapsed.
// When multiple spans are uncollapsed their children are all visible at once.
//
// selectedSpanID
//
// The span currently focused — set when the user clicks a span in the
// waterfall or selects one from the flamegraph. The output window is always
// centred on this span. The path from the trace root down to the selected
// span is automatically uncollapsed so ancestors are visible even if they are
// not in uncollapsedSpans.
//
// isSelectedSpanIDUnCollapsed
//
// Controls whether the selected span's own children are shown:
// true — user expanded the span (click-to-open in waterfall or flamegraph);
// direct children of the selected span are included.
// false — user selected without expanding;
// the span is visible but its children remain hidden.
//
// traceRoots
//
// Root spans of the trace — spans with no parent in the current dataset.
// Normally one, but multiple roots are common when upstream services are
// not instrumented or their spans were not sampled/exported.
package tracedetail
import (
"fmt"
"testing"
"github.com/SigNoz/signoz/pkg/query-service/model"
"github.com/stretchr/testify/assert"
)
// Pre-order traversal is preserved: parent before children, siblings left-to-right.
func TestGetSelectedSpans_PreOrderTraversal(t *testing.T) {
root := mkSpan("root", "svc",
mkSpan("child1", "svc", mkSpan("grandchild", "svc")),
mkSpan("child2", "svc"),
)
spanMap := buildSpanMap(root)
spans, _, _, _ := GetSelectedSpans([]string{"root", "child1"}, "root", []*model.Span{root}, spanMap, false)
assert.Equal(t, []string{"root", "child1", "grandchild", "child2"}, spanIDs(spans))
}
// Multiple roots: both trees are flattened into a single pre-order list with
// root1's subtree before root2's. Service/entry-point come from the first root.
//
// root1 svc-a ← selected
// └─ child1
// root2 svc-b
// └─ child2
//
// Expected output order: root1 → child1 → root2 → child2
func TestGetSelectedSpans_MultipleRoots(t *testing.T) {
root1 := mkSpan("root1", "svc-a", mkSpan("child1", "svc-a"))
root2 := mkSpan("root2", "svc-b", mkSpan("child2", "svc-b"))
spanMap := buildSpanMap(root1, root2)
spans, _, svcName, entryPoint := GetSelectedSpans([]string{"root1", "root2"}, "root1", []*model.Span{root1, root2}, spanMap, false)
assert.Equal(t, []string{"root1", "child1", "root2", "child2"}, spanIDs(spans), "root1 subtree must precede root2 subtree")
assert.Equal(t, "svc-a", svcName, "metadata comes from first root")
assert.Equal(t, "root1-op", entryPoint, "metadata comes from first root")
}
// Multiple spans uncollapsed simultaneously: children of all uncollapsed spans
// are visible at once.
//
// root
// ├─ childA (uncollapsed) → grandchildA ✓
// └─ childB (uncollapsed) → grandchildB ✓
func TestGetSelectedSpans_MultipleUncollapsed(t *testing.T) {
root := mkSpan("root", "svc",
mkSpan("childA", "svc", mkSpan("grandchildA", "svc")),
mkSpan("childB", "svc", mkSpan("grandchildB", "svc")),
)
spanMap := buildSpanMap(root)
spans, _, _, _ := GetSelectedSpans([]string{"root", "childA", "childB"}, "root", []*model.Span{root}, spanMap, false)
assert.Equal(t, []string{"root", "childA", "grandchildA", "childB", "grandchildB"}, spanIDs(spans))
}
// Collapsing a span with other uncollapsed spans
//
// root
// ├─ childA (previously expanded — in uncollapsedSpans)
// │ ├─ grandchild1 ✓
// │ │ └─ greatGrandchild ✗ (grandchild1 not in uncollapsedSpans)
// │ └─ grandchild2 ✓
// └─ childB ← selected (not expanded)
func TestGetSelectedSpans_ManualUncollapse(t *testing.T) {
root := mkSpan("root", "svc",
mkSpan("childA", "svc",
mkSpan("grandchild1", "svc", mkSpan("greatGrandchild", "svc")),
mkSpan("grandchild2", "svc"),
),
mkSpan("childB", "svc"),
)
spanMap := buildSpanMap(root)
// childA was expanded in a previous interaction; childB is now selected without expanding
spans, _, _, _ := GetSelectedSpans([]string{"childA"}, "childB", []*model.Span{root}, spanMap, false)
// path to childB auto-uncollpases root → childA and childB appear; childA is in
// uncollapsedSpans so its children appear; greatGrandchild stays hidden.
assert.Equal(t, []string{"root", "childA", "grandchild1", "grandchild2", "childB"}, spanIDs(spans))
}
// A collapsed span hides all children.
func TestGetSelectedSpans_CollapsedSpan(t *testing.T) {
root := mkSpan("root", "svc",
mkSpan("child1", "svc"),
mkSpan("child2", "svc"),
)
spanMap := buildSpanMap(root)
spans, _, _, _ := GetSelectedSpans([]string{}, "root", []*model.Span{root}, spanMap, false)
assert.Equal(t, []string{"root"}, spanIDs(spans))
}
// Selecting a span auto-uncollpases the path from root to that span so it is visible.
//
// root → parent → selected
func TestGetSelectedSpans_PathToSelectedIsUncollapsed(t *testing.T) {
root := mkSpan("root", "svc",
mkSpan("parent", "svc",
mkSpan("selected", "svc"),
),
)
spanMap := buildSpanMap(root)
// no manually uncollapsed spans — path should still be opened
spans, _, _, _ := GetSelectedSpans([]string{}, "selected", []*model.Span{root}, spanMap, false)
assert.Equal(t, []string{"root", "parent", "selected"}, spanIDs(spans))
}
// The path-to-selected spans are returned in updatedUncollapsedSpans.
func TestGetSelectedSpans_PathReturnedInUncollapsed(t *testing.T) {
root := mkSpan("root", "svc",
mkSpan("parent", "svc",
mkSpan("selected", "svc"),
),
)
spanMap := buildSpanMap(root)
spans, uncollapsed, _, _ := GetSelectedSpans([]string{}, "selected", []*model.Span{root}, spanMap, false)
assert.ElementsMatch(t, []string{"root", "parent"}, uncollapsed)
assert.Equal(t, []string{"root", "parent", "selected"}, spanIDs(spans))
}
// Siblings of ancestors are rendered as collapsed nodes but their subtrees
// must NOT be expanded.
//
// root
// ├─ unrelated → unrelated-child (✗)
// └─ parent → selected
func TestGetSelectedSpans_SiblingsNotExpanded(t *testing.T) {
root := mkSpan("root", "svc",
mkSpan("unrelated", "svc", mkSpan("unrelated-child", "svc")),
mkSpan("parent", "svc",
mkSpan("selected", "svc"),
),
)
spanMap := buildSpanMap(root)
spans, uncollapsed, _, _ := GetSelectedSpans([]string{}, "selected", []*model.Span{root}, spanMap, false)
// children of root sort alphabetically: parent < unrelated; unrelated-child stays hidden
assert.Equal(t, []string{"root", "parent", "selected", "unrelated"}, spanIDs(spans))
// only the path nodes are tracked as uncollapsed — unrelated is not
assert.ElementsMatch(t, []string{"root", "parent"}, uncollapsed)
}
// An unknown selectedSpanID must not panic; returns a window from index 0.
func TestGetSelectedSpans_UnknownSelectedSpan(t *testing.T) {
root := mkSpan("root", "svc", mkSpan("child", "svc"))
spanMap := buildSpanMap(root)
spans, _, _, _ := GetSelectedSpans([]string{}, "nonexistent", []*model.Span{root}, spanMap, false)
assert.Equal(t, []string{"root"}, spanIDs(spans))
}
// Test to check if Level, HasChildren, HasSiblings, and SubTreeNodeCount are populated correctly.
//
// root level=0, hasChildren=true, hasSiblings=false, subTree=4
// child1 level=1, hasChildren=true, hasSiblings=true, subTree=2
// grandchild level=2, hasChildren=false, hasSiblings=false, subTree=1
// child2 level=1, hasChildren=false, hasSiblings=false, subTree=1
func TestGetSelectedSpans_SpanMetadata(t *testing.T) {
root := mkSpan("root", "svc",
mkSpan("child1", "svc", mkSpan("grandchild", "svc")),
mkSpan("child2", "svc"),
)
spanMap := buildSpanMap(root)
spans, _, _, _ := GetSelectedSpans([]string{"root", "child1"}, "root", []*model.Span{root}, spanMap, false)
byID := map[string]*model.Span{}
for _, s := range spans {
byID[s.SpanID] = s
}
assert.Equal(t, uint64(0), byID["root"].Level)
assert.Equal(t, uint64(1), byID["child1"].Level)
assert.Equal(t, uint64(1), byID["child2"].Level)
assert.Equal(t, uint64(2), byID["grandchild"].Level)
assert.True(t, byID["root"].HasChildren)
assert.True(t, byID["child1"].HasChildren)
assert.False(t, byID["child2"].HasChildren)
assert.False(t, byID["grandchild"].HasChildren)
assert.False(t, byID["root"].HasSiblings, "root has no siblings")
assert.True(t, byID["child1"].HasSiblings, "child1 has sibling child2")
assert.False(t, byID["child2"].HasSiblings, "child2 is the last child")
assert.False(t, byID["grandchild"].HasSiblings, "grandchild has no siblings")
assert.Equal(t, uint64(4), byID["root"].SubTreeNodeCount)
assert.Equal(t, uint64(2), byID["child1"].SubTreeNodeCount)
assert.Equal(t, uint64(1), byID["grandchild"].SubTreeNodeCount)
assert.Equal(t, uint64(1), byID["child2"].SubTreeNodeCount)
}
// If the selected span is already in uncollapsedSpans AND isSelectedSpanIDUnCollapsed=true,
func TestGetSelectedSpans_DuplicateInUncollapsed(t *testing.T) {
root := mkSpan("root", "svc",
mkSpan("selected", "svc", mkSpan("child", "svc")),
)
spanMap := buildSpanMap(root)
_, uncollapsed, _, _ := GetSelectedSpans(
[]string{"selected"}, // already present
"selected",
[]*model.Span{root}, spanMap,
true,
)
count := 0
for _, id := range uncollapsed {
if id == "selected" {
count++
}
}
assert.Equal(t, 1, count, "should appear once")
}
// makeChain builds a linear trace: span0 → span1 → … → span(n-1).
// All span IDs are "span0", "span1", … so the caller can reference them by index.
func makeChain(n int) (*model.Span, map[string]*model.Span, []string) {
spans := make([]*model.Span, n)
for i := n - 1; i >= 0; i-- {
if i == n-1 {
spans[i] = mkSpan(fmt.Sprintf("span%d", i), "svc")
} else {
spans[i] = mkSpan(fmt.Sprintf("span%d", i), "svc", spans[i+1])
}
}
uncollapsed := make([]string, n)
for i := range spans {
uncollapsed[i] = fmt.Sprintf("span%d", i)
}
return spans[0], buildSpanMap(spans[0]), uncollapsed
}
// The selected span is centred: 200 spans before it, 300 after (0.4 / 0.6 split).
func TestGetSelectedSpans_WindowCentredOnSelected(t *testing.T) {
root, spanMap, uncollapsed := makeChain(600)
spans, _, _, _ := GetSelectedSpans(uncollapsed, "span300", []*model.Span{root}, spanMap, false)
assert.Equal(t, 500, len(spans), "window should be 500 spans")
// window is [100, 600): span300 lands at position 200 (300 - 100)
assert.Equal(t, "span100", spans[0].SpanID, "window starts 200 before selected")
assert.Equal(t, "span300", spans[200].SpanID, "selected span at position 200 in window")
assert.Equal(t, "span599", spans[499].SpanID, "window ends 300 after selected")
}
// When the selected span is near the start, the window shifts right so no
// negative index is used — the result is still 500 spans.
func TestGetSelectedSpans_WindowShiftsAtStart(t *testing.T) {
root, spanMap, uncollapsed := makeChain(600)
spans, _, _, _ := GetSelectedSpans(uncollapsed, "span10", []*model.Span{root}, spanMap, false)
assert.Equal(t, 500, len(spans))
assert.Equal(t, "span0", spans[0].SpanID, "window clamped to start of trace")
assert.Equal(t, "span10", spans[10].SpanID, "selected span still in window")
}
// Auto-expanded span IDs from ALL branches are returned in
// updatedUncollapsedSpans. Only internal nodes (spans with children) are
// tracked — leaf spans are never added.
//
// root (selected)
// ├─ childA (internal ✓)
// │ └─ grandchildA (internal ✓)
// │ └─ leafA (leaf ✗)
// └─ childB (internal ✓)
// └─ grandchildB (internal ✓)
// └─ leafB (leaf ✗)
func TestGetSelectedSpans_AutoExpandedSpansReturnedInUncollapsed(t *testing.T) {
root := mkSpan("root", "svc",
mkSpan("childA", "svc",
mkSpan("grandchildA", "svc",
mkSpan("leafA", "svc"),
),
),
mkSpan("childB", "svc",
mkSpan("grandchildB", "svc",
mkSpan("leafB", "svc"),
),
),
)
spanMap := buildSpanMap(root)
_, uncollapsed, _, _ := GetSelectedSpans([]string{}, "root", []*model.Span{root}, spanMap, true)
// all internal nodes across both branches must be tracked
assert.Contains(t, uncollapsed, "root")
assert.Contains(t, uncollapsed, "childA", "internal node depth 1, branch A")
assert.Contains(t, uncollapsed, "childB", "internal node depth 1, branch B")
assert.Contains(t, uncollapsed, "grandchildA", "internal node depth 2, branch A")
assert.Contains(t, uncollapsed, "grandchildB", "internal node depth 2, branch B")
// leaves have no children to show — never added to uncollapsedSpans
assert.NotContains(t, uncollapsed, "leafA", "leaf spans are never added to uncollapsedSpans")
assert.NotContains(t, uncollapsed, "leafB", "leaf spans are never added to uncollapsedSpans")
}
// ─────────────────────────────────────────────────────────────────────────────
// maxDepthForSelectedSpanChildren boundary tests
// ─────────────────────────────────────────────────────────────────────────────
// Depth is measured from the selected span, not the trace root.
// Ancestors appear via the path-to-root logic, not the depth limit.
// Each depth level has two children to confirm the limit is enforced on all
// branches, not just the first.
//
// root
// └─ A ancestor ✓ (path-to-root)
// └─ selected
// ├─ d1a depth 1 ✓
// │ ├─ d2a depth 2 ✓
// │ │ ├─ d3a depth 3 ✓
// │ │ │ ├─ d4a depth 4 ✓
// │ │ │ │ ├─ d5a depth 5 ✓
// │ │ │ │ │ └─ d6a depth 6 ✗
// │ │ │ │ └─ d5b depth 5 ✓
// │ │ │ └─ d4b depth 4 ✓
// │ │ └─ d3b depth 3 ✓
// │ └─ d2b depth 2 ✓
// └─ d1b depth 1 ✓
func TestGetSelectedSpans_DepthCountedFromSelectedSpan(t *testing.T) {
selected := mkSpan("selected", "svc",
mkSpan("d1a", "svc",
mkSpan("d2a", "svc",
mkSpan("d3a", "svc",
mkSpan("d4a", "svc",
mkSpan("d5a", "svc",
mkSpan("d6a", "svc"), // depth 6 — excluded
),
mkSpan("d5b", "svc"), // depth 5 — included
),
mkSpan("d4b", "svc"), // depth 4 — included
),
mkSpan("d3b", "svc"), // depth 3 — included
),
mkSpan("d2b", "svc"), // depth 2 — included
),
mkSpan("d1b", "svc"), // depth 1 — included
)
root := mkSpan("root", "svc", mkSpan("A", "svc", selected))
spanMap := buildSpanMap(root)
spans, _, _, _ := GetSelectedSpans([]string{}, "selected", []*model.Span{root}, spanMap, true)
ids := spanIDs(spans)
assert.Contains(t, ids, "root", "ancestor shown via path-to-root")
assert.Contains(t, ids, "A", "ancestor shown via path-to-root")
for _, id := range []string{"d1a", "d1b", "d2a", "d2b", "d3a", "d3b", "d4a", "d4b", "d5a", "d5b"} {
assert.Contains(t, ids, id, "depth ≤ 5 — must be included")
}
assert.NotContains(t, ids, "d6a", "depth 6 > limit — excluded")
}
func TestGetAllSpans(t *testing.T) {
root := mkSpan("root", "svc",
mkSpan("childA", "svc",
mkSpan("grandchildA", "svc",
mkSpan("leafA", "svc2"),
),
),
mkSpan("childB", "svc3",
mkSpan("grandchildB", "svc",
mkSpan("leafB", "svc2"),
),
),
)
spans, rootServiceName, rootEntryPoint := GetAllSpans([]*model.Span{root})
assert.ElementsMatch(t, spanIDs(spans), []string{"root", "childA", "grandchildA", "leafA", "childB", "grandchildB", "leafB"})
assert.Equal(t, rootServiceName, "svc")
assert.Equal(t, rootEntryPoint, "root-op")
}
func mkSpan(id, service string, children ...*model.Span) *model.Span {
return &model.Span{
SpanID: id,
ServiceName: service,
Name: id + "-op",
Children: children,
}
}
// spanIDs returns SpanIDs in order.
func spanIDs(spans []*model.Span) []string {
ids := make([]string, len(spans))
for i, s := range spans {
ids[i] = s.SpanID
}
return ids
}
// buildSpanMap indexes every span in a set of trees by SpanID.
func buildSpanMap(roots ...*model.Span) map[string]*model.Span {
m := map[string]*model.Span{}
var walk func(*model.Span)
walk = func(s *model.Span) {
m[s.SpanID] = s
for _, c := range s.Children {
walk(c)
}
}
for _, r := range roots {
walk(r)
}
return m
}

View File

@@ -43,9 +43,6 @@ type Reader interface {
// Search Interfaces
SearchTraces(ctx context.Context, params *model.SearchTracesParams) (*[]model.SearchSpansResult, error)
GetWaterfallSpansForTraceWithMetadata(ctx context.Context, orgID valuer.UUID, traceID string, req *model.GetWaterfallSpansForTraceWithMetadataParams) (*model.GetWaterfallSpansForTraceWithMetadataResponse, error)
GetFlamegraphSpansForTrace(ctx context.Context, orgID valuer.UUID, traceID string, req *model.GetFlamegraphSpansForTraceParams) (*model.GetFlamegraphSpansForTraceResponse, error)
// Setter Interfaces
SetTTL(ctx context.Context, orgID string, ttlParams *retentiontypes.TTLParams) (*retentiontypes.SetTTLResponseItem, *model.ApiError)
SetTTLV2(ctx context.Context, orgID string, params *retentiontypes.CustomRetentionTTLParams) (*retentiontypes.CustomRetentionTTLResponse, error)

View File

@@ -1,89 +0,0 @@
package model
import (
"encoding/json"
"maps"
"github.com/SigNoz/signoz/pkg/types/cachetypes"
)
type GetWaterfallSpansForTraceWithMetadataCache struct {
StartTime uint64 `json:"startTime"`
EndTime uint64 `json:"endTime"`
DurationNano uint64 `json:"durationNano"`
TotalSpans uint64 `json:"totalSpans"`
TotalErrorSpans uint64 `json:"totalErrorSpans"`
ServiceNameToTotalDurationMap map[string]uint64 `json:"serviceNameToTotalDurationMap"`
SpanIdToSpanNodeMap map[string]*Span `json:"spanIdToSpanNodeMap"`
TraceRoots []*Span `json:"traceRoots"`
HasMissingSpans bool `json:"hasMissingSpans"`
}
func (c *GetWaterfallSpansForTraceWithMetadataCache) Clone() cachetypes.Cacheable {
copyOfServiceNameToTotalDurationMap := make(map[string]uint64)
maps.Copy(copyOfServiceNameToTotalDurationMap, c.ServiceNameToTotalDurationMap)
copyOfSpanIdToSpanNodeMap := make(map[string]*Span)
maps.Copy(copyOfSpanIdToSpanNodeMap, c.SpanIdToSpanNodeMap)
copyOfTraceRoots := make([]*Span, len(c.TraceRoots))
copy(copyOfTraceRoots, c.TraceRoots)
return &GetWaterfallSpansForTraceWithMetadataCache{
StartTime: c.StartTime,
EndTime: c.EndTime,
DurationNano: c.DurationNano,
TotalSpans: c.TotalSpans,
TotalErrorSpans: c.TotalErrorSpans,
ServiceNameToTotalDurationMap: copyOfServiceNameToTotalDurationMap,
SpanIdToSpanNodeMap: copyOfSpanIdToSpanNodeMap,
TraceRoots: copyOfTraceRoots,
HasMissingSpans: c.HasMissingSpans,
}
}
func (c *GetWaterfallSpansForTraceWithMetadataCache) Cost() int64 {
const perSpanBytes = 256
return int64(c.TotalSpans) * perSpanBytes
}
func (c *GetWaterfallSpansForTraceWithMetadataCache) MarshalBinary() (data []byte, err error) {
return json.Marshal(c)
}
func (c *GetWaterfallSpansForTraceWithMetadataCache) UnmarshalBinary(data []byte) error {
return json.Unmarshal(data, c)
}
type GetFlamegraphSpansForTraceCache struct {
StartTime uint64 `json:"startTime"`
EndTime uint64 `json:"endTime"`
DurationNano uint64 `json:"durationNano"`
SelectedSpans [][]*FlamegraphSpan `json:"selectedSpans"`
TraceRoots []*FlamegraphSpan `json:"traceRoots"`
}
func (c *GetFlamegraphSpansForTraceCache) Clone() cachetypes.Cacheable {
return &GetFlamegraphSpansForTraceCache{
StartTime: c.StartTime,
EndTime: c.EndTime,
DurationNano: c.DurationNano,
SelectedSpans: c.SelectedSpans,
TraceRoots: c.TraceRoots,
}
}
func (c *GetFlamegraphSpansForTraceCache) Cost() int64 {
const perSpanBytes = 128
var spans int64
for _, row := range c.SelectedSpans {
spans += int64(len(row))
}
spans += int64(len(c.TraceRoots))
return spans * perSpanBytes
}
func (c *GetFlamegraphSpansForTraceCache) MarshalBinary() (data []byte, err error) {
return json.Marshal(c)
}
func (c *GetFlamegraphSpansForTraceCache) UnmarshalBinary(data []byte) error {
return json.Unmarshal(data, c)
}

View File

@@ -2,8 +2,6 @@ package model
import (
"time"
"github.com/SigNoz/signoz/pkg/types/telemetrytypes"
)
type InstantQueryMetricsParams struct {
@@ -331,21 +329,6 @@ type SearchTracesParams struct {
MaxSpansInTrace int `json:"maxSpansInTrace"`
}
type GetWaterfallSpansForTraceWithMetadataParams struct {
SelectedSpanID string `json:"selectedSpanId"`
IsSelectedSpanIDUnCollapsed bool `json:"isSelectedSpanIDUnCollapsed"`
UncollapsedSpans []string `json:"uncollapsedSpans"`
Limit uint `json:"limit"`
}
type GetFlamegraphSpansForTraceParams struct {
SelectedSpanID string `json:"selectedSpanId"`
Limit uint `json:"limit"`
BoundaryStartTS uint64 `json:"boundaryStartTsMilli"`
BoundaryEndTS uint64 `json:"boundarEndTsMilli"`
SelectFields []telemetrytypes.TelemetryFieldKey `json:"selectFields"`
}
type SpanFilterParams struct {
TraceID []string `json:"traceID"`
Status []string `json:"status"`

View File

@@ -7,7 +7,6 @@ import (
"strconv"
"time"
"github.com/SigNoz/signoz/pkg/types/telemetrytypes"
"github.com/pkg/errors"
"github.com/prometheus/prometheus/promql/parser"
"github.com/prometheus/prometheus/util/stats"
@@ -293,69 +292,6 @@ type Span struct {
Level uint64 `json:"level"`
}
type FlamegraphSpan struct {
TimeUnixNano uint64 `json:"timestamp"`
DurationNano uint64 `json:"durationNano"`
SpanID string `json:"spanId"`
TraceID string `json:"traceId"`
HasError bool `json:"hasError"`
ServiceName string `json:"serviceName"`
Name string `json:"name"`
Level int64 `json:"level"`
Events []Event `json:"event"`
References []OtelSpanRef `json:"references,omitempty"`
Children []*FlamegraphSpan `json:"children"`
Attributes map[string]any `json:"attributes,omitempty"`
Resource map[string]string `json:"resource,omitempty"`
}
// SetRequestedFields extracts the requested attribute/resource fields from item into s.
// This can eventually support missing fieldContext by checking both
func (s *FlamegraphSpan) SetRequestedFields(item SpanItemV2, fields []telemetrytypes.TelemetryFieldKey) {
for _, field := range fields {
switch field.FieldContext {
case telemetrytypes.FieldContextResource:
if v, ok := item.Resources_string[field.Name]; ok && v != "" {
if s.Resource == nil {
s.Resource = make(map[string]string)
}
s.Resource[field.Name] = v
}
case telemetrytypes.FieldContextAttribute:
if v := item.AttributeValue(field.Name); v != nil {
if s.Attributes == nil {
s.Attributes = make(map[string]any)
}
s.Attributes[field.Name] = v
}
}
}
}
type GetWaterfallSpansForTraceWithMetadataResponse struct {
StartTimestampMillis uint64 `json:"startTimestampMillis"`
EndTimestampMillis uint64 `json:"endTimestampMillis"`
DurationNano uint64 `json:"durationNano"`
RootServiceName string `json:"rootServiceName"`
RootServiceEntryPoint string `json:"rootServiceEntryPoint"`
TotalSpansCount uint64 `json:"totalSpansCount"`
TotalErrorSpansCount uint64 `json:"totalErrorSpansCount"`
ServiceNameToTotalDurationMap map[string]uint64 `json:"serviceNameToTotalDurationMap"`
Spans []*Span `json:"spans"`
HasMissingSpans bool `json:"hasMissingSpans"`
// this is needed for frontend and query service sync
UncollapsedSpans []string `json:"uncollapsedSpans"`
HasMore bool `json:"hasMore"`
}
type GetFlamegraphSpansForTraceResponse struct {
StartTimestampMillis uint64 `json:"startTimestampMillis"`
EndTimestampMillis uint64 `json:"endTimestampMillis"`
DurationNano uint64 `json:"durationNano"`
Spans [][]*FlamegraphSpan `json:"spans"`
HasMore bool `json:"hasMore"`
}
type OtelSpanRef struct {
TraceId string `json:"traceId,omitempty"`
SpanId string `json:"spanId,omitempty"`

View File

@@ -1,251 +0,0 @@
package spantypes
import (
"testing"
"github.com/SigNoz/signoz/pkg/types/telemetrytypes"
"github.com/stretchr/testify/assert"
)
// mkASpan builds a WaterfallSpan with timing and field data for analytics tests.
func mkASpan(id string, resource map[string]string, attributes map[string]any, startNs, durationNs uint64) *WaterfallSpan {
return &WaterfallSpan{
SpanID: id,
Resource: resource,
Attributes: attributes,
TimeUnix: startNs,
DurationNano: durationNs,
Children: make([]*WaterfallSpan, 0),
}
}
func buildTraceFromSpans(spans ...*WaterfallSpan) *WaterfallTrace {
spanMap := make(map[string]*WaterfallSpan, len(spans))
var startTime, endTime uint64
initialized := false
for _, s := range spans {
spanMap[s.SpanID] = s
if !initialized || s.TimeUnix < startTime {
startTime = s.TimeUnix
initialized = true
}
if end := s.TimeUnix + s.DurationNano; end > endTime {
endTime = end
}
}
return NewWaterfallTrace(startTime, endTime, uint64(len(spanMap)), 0, spanMap, nil, false)
}
var (
fieldServiceName = telemetrytypes.TelemetryFieldKey{
Name: "service.name",
FieldContext: telemetrytypes.FieldContextResource,
}
fieldHTTPMethod = telemetrytypes.TelemetryFieldKey{
Name: "http.method",
FieldContext: telemetrytypes.FieldContextAttribute,
}
fieldCached = telemetrytypes.TelemetryFieldKey{
Name: "db.cached",
FieldContext: telemetrytypes.FieldContextAttribute,
}
)
func TestGetSpanAggregation_SpanCount(t *testing.T) {
tests := []struct {
name string
trace *WaterfallTrace
field telemetrytypes.TelemetryFieldKey
want map[string]uint64
}{
{
name: "counts by resource field",
trace: buildTraceFromSpans(
mkASpan("s1", map[string]string{"service.name": "frontend"}, nil, 0, 10),
mkASpan("s2", map[string]string{"service.name": "frontend"}, nil, 10, 5),
mkASpan("s3", map[string]string{"service.name": "backend"}, nil, 20, 8),
),
field: fieldServiceName,
want: map[string]uint64{"frontend": 2, "backend": 1},
},
{
name: "counts by string attribute field",
trace: buildTraceFromSpans(
mkASpan("s1", nil, map[string]any{"http.method": "GET"}, 0, 10),
mkASpan("s2", nil, map[string]any{"http.method": "POST"}, 10, 5),
mkASpan("s3", nil, map[string]any{"http.method": "GET"}, 20, 8),
),
field: fieldHTTPMethod,
want: map[string]uint64{"GET": 2, "POST": 1},
},
{
name: "counts by boolean attribute field",
trace: buildTraceFromSpans(
mkASpan("s1", nil, map[string]any{"db.cached": true}, 0, 10),
mkASpan("s2", nil, map[string]any{"db.cached": false}, 10, 5),
mkASpan("s3", nil, map[string]any{"db.cached": true}, 20, 8),
),
field: fieldCached,
want: map[string]uint64{"true": 2, "false": 1},
},
{
name: "spans missing the field are excluded",
trace: buildTraceFromSpans(
mkASpan("s1", map[string]string{"service.name": "frontend"}, nil, 0, 10),
mkASpan("s2", map[string]string{}, nil, 10, 5), // no service.name
mkASpan("s3", map[string]string{"service.name": "backend"}, nil, 20, 8),
),
field: fieldServiceName,
want: map[string]uint64{"frontend": 1, "backend": 1},
},
{
// empty string is a valid field value — counted under the "" key, unlike a missing field
name: "span with empty service.name is counted under empty string key",
trace: buildTraceFromSpans(
mkASpan("s1", map[string]string{"service.name": "frontend"}, nil, 0, 10),
mkASpan("s2", map[string]string{"service.name": ""}, nil, 10, 5),
mkASpan("s3", map[string]string{"service.name": "backend"}, nil, 20, 8),
),
field: fieldServiceName,
want: map[string]uint64{"frontend": 1, "backend": 1, "": 1},
},
}
for _, tc := range tests {
t.Run(tc.name, func(t *testing.T) {
result := tc.trace.GetSpanAggregation(SpanAggregationSpanCount, tc.field)
assert.Equal(t, tc.field, result.Field)
assert.Equal(t, SpanAggregationSpanCount, result.Aggregation)
assert.Equal(t, tc.want, result.Value)
})
}
}
func TestGetSpanAggregation_Duration(t *testing.T) {
tests := []struct {
name string
trace *WaterfallTrace
field telemetrytypes.TelemetryFieldKey
want map[string]uint64
}{
{
name: "non-overlapping spans — merged equals sum",
trace: buildTraceFromSpans(
mkASpan("s1", map[string]string{"service.name": "frontend"}, nil, 0, 100),
mkASpan("s2", map[string]string{"service.name": "frontend"}, nil, 100, 50),
mkASpan("s3", map[string]string{"service.name": "backend"}, nil, 0, 80),
),
field: fieldServiceName,
want: map[string]uint64{"frontend": 150, "backend": 80},
},
{
name: "non-overlapping attribute groups — merged equals sum",
trace: buildTraceFromSpans(
mkASpan("s1", nil, map[string]any{"http.method": "GET"}, 0, 30),
mkASpan("s2", nil, map[string]any{"http.method": "GET"}, 50, 20),
mkASpan("s3", nil, map[string]any{"http.method": "POST"}, 0, 70),
),
field: fieldHTTPMethod,
want: map[string]uint64{"GET": 50, "POST": 70},
},
{
name: "overlapping spans — non-overlapping interval merge",
trace: buildTraceFromSpans(
mkASpan("s1", map[string]string{"service.name": "svc"}, nil, 0, 10),
mkASpan("s2", map[string]string{"service.name": "svc"}, nil, 5, 10),
),
field: fieldServiceName,
want: map[string]uint64{"svc": 15}, // [0,10] [5,15] = [0,15]
},
}
for _, tc := range tests {
t.Run(tc.name, func(t *testing.T) {
result := tc.trace.GetSpanAggregation(SpanAggregationDuration, tc.field)
assert.Equal(t, tc.field, result.Field)
assert.Equal(t, SpanAggregationDuration, result.Aggregation)
assert.Equal(t, tc.want, result.Value)
})
}
}
func TestGetSpanAggregation_ExecutionTimePercentage(t *testing.T) {
tests := []struct {
name string
trace *WaterfallTrace
field telemetrytypes.TelemetryFieldKey
want map[string]uint64
}{
{
// trace [0,30]: svc occupies [0,10]+[20,30]=20 → 20*100/30 = 66%
name: "non-overlapping spans",
trace: buildTraceFromSpans(
mkASpan("s1", map[string]string{"service.name": "svc"}, nil, 0, 10),
mkASpan("s2", map[string]string{"service.name": "svc"}, nil, 20, 10),
),
field: fieldServiceName,
want: map[string]uint64{"svc": 66},
},
{
// trace [0,15]: svc [0,15]=15 → 100%
name: "partially overlapping spans",
trace: buildTraceFromSpans(
mkASpan("s1", map[string]string{"service.name": "svc"}, nil, 0, 10),
mkASpan("s2", map[string]string{"service.name": "svc"}, nil, 5, 10),
),
field: fieldServiceName,
want: map[string]uint64{"svc": 100},
},
{
// trace [0,20]: outer absorbs inner → 100%
name: "fully contained span",
trace: buildTraceFromSpans(
mkASpan("outer", map[string]string{"service.name": "svc"}, nil, 0, 20),
mkASpan("inner", map[string]string{"service.name": "svc"}, nil, 5, 5),
),
field: fieldServiceName,
want: map[string]uint64{"svc": 100},
},
{
// trace [0,30]: svc [0,15]+[20,30]=25 → 25*100/30 = 83%
name: "three spans with two merges",
trace: buildTraceFromSpans(
mkASpan("s1", map[string]string{"service.name": "svc"}, nil, 0, 10),
mkASpan("s2", map[string]string{"service.name": "svc"}, nil, 5, 10),
mkASpan("s3", map[string]string{"service.name": "svc"}, nil, 20, 10),
),
field: fieldServiceName,
want: map[string]uint64{"svc": 83},
},
{
// trace [0,28]: frontend [0,15]=15 → 53%, backend [0,5]+[20,28]=13 → 46%
name: "independent groups are computed separately",
trace: buildTraceFromSpans(
mkASpan("a1", map[string]string{"service.name": "frontend"}, nil, 0, 10),
mkASpan("a2", map[string]string{"service.name": "frontend"}, nil, 5, 10),
mkASpan("b1", map[string]string{"service.name": "backend"}, nil, 0, 5),
mkASpan("b2", map[string]string{"service.name": "backend"}, nil, 20, 8),
),
field: fieldServiceName,
want: map[string]uint64{"frontend": 53, "backend": 46},
},
{
// trace [100,150]: svc [100,150]=50 → 100%
name: "single span",
trace: buildTraceFromSpans(
mkASpan("s1", map[string]string{"service.name": "svc"}, nil, 100, 50),
),
field: fieldServiceName,
want: map[string]uint64{"svc": 100},
},
}
for _, tc := range tests {
t.Run(tc.name, func(t *testing.T) {
result := tc.trace.GetSpanAggregation(SpanAggregationExecutionTimePercentage, tc.field)
assert.Equal(t, tc.field, result.Field)
assert.Equal(t, SpanAggregationExecutionTimePercentage, result.Aggregation)
assert.Equal(t, tc.want, result.Value)
})
}
}

View File

@@ -40,13 +40,56 @@ func NewFlamegraphTraceFromStorable(spans []StorableSpan, selectFields []telemet
return t
}
// GetAllLevels return level wise spans using BFS on trace.
func (t *FlamegraphTrace) GetAllLevels() [][]*FlamegraphSpan {
return nil
var result [][]*FlamegraphSpan
for _, root := range t.roots {
currentLevel := []*FlamegraphSpan{root}
for depth := int64(0); len(currentLevel) > 0; depth++ {
var nextLevel []*FlamegraphSpan
for _, node := range currentLevel {
node.Level = depth
nextLevel = append(nextLevel, node.Children...)
node.Children = nil // release tree reference
}
result = append(result, currentLevel)
currentLevel = nextLevel
}
}
return result
}
// GetSelectedLevels returns the window of levels around selectedSpanID with sampling applied to dense levels.
func (t *FlamegraphTrace) GetSelectedLevels(selectedSpanID string, levelLimit, spansPerLevel, topLatencyCount, bucketCount int) []FlamegraphLevel {
return nil
allLevels := t.GetAllLevels()
selectedIndex := getLevelIndex(allLevels, selectedSpanID)
// 40% window above level with selected span and 60% below that
beforeSelectedLevel := int(float64(levelLimit) * 0.4)
startLevel := max(0, selectedIndex-beforeSelectedLevel)
endLevel := min(len(allLevels), startLevel+levelLimit)
startLevel = max(0, endLevel-levelLimit) // utilize the page size if endLevel is clamped
result := make([]FlamegraphLevel, 0, endLevel-startLevel)
for i := startLevel; i < endLevel; i++ {
spans := allLevels[i]
sampled := spans
if len(spans) > spansPerLevel {
sampled = t.sampleLevel(spans, selectedSpanID, i == selectedIndex, topLatencyCount, bucketCount)
}
if len(sampled) == 0 {
continue
}
spanIDs := make([]string, len(sampled))
for j, s := range sampled {
spanIDs[j] = s.SpanID
}
result = append(result, FlamegraphLevel{Level: spans[0].Level, SpanIDs: spanIDs})
}
return result
}
func (t *FlamegraphTrace) EnrichSelectedSpans(selectedSpans []FlamegraphLevel, fullSpans []StorableSpan, selectFields []telemetrytypes.TelemetryFieldKey) [][]*FlamegraphSpan {
@@ -101,6 +144,77 @@ func (t *FlamegraphTrace) buildSpanTree() {
})
}
func (t *FlamegraphTrace) sampleLevel(spans []*FlamegraphSpan, selectedSpanID string, isSelectedLevel bool, topLatencyCount, bucketCount int) []*FlamegraphSpan {
sorted := make([]*FlamegraphSpan, len(spans))
copy(sorted, spans)
sort.Slice(sorted, func(i, j int) bool {
return sorted[i].DurationNano > sorted[j].DurationNano
})
sampled := make(map[string]*FlamegraphSpan, topLatencyCount+1)
topK := min(topLatencyCount, len(sorted))
for _, span := range sorted[:topK] {
sampled[span.SpanID] = span
}
// include the selected span.
if isSelectedLevel {
for _, span := range sorted {
if span.SpanID == selectedSpanID {
sampled[span.SpanID] = span
break
}
}
}
for _, span := range t.bucketSampleSpans(sorted, bucketCount, sampled) {
sampled[span.SpanID] = span
}
result := make([]*FlamegraphSpan, 0, len(sampled))
for _, span := range sampled {
result = append(result, span)
}
return result
}
func (t *FlamegraphTrace) bucketSampleSpans(sorted []*FlamegraphSpan, bucketCount int, exclude map[string]*FlamegraphSpan) []*FlamegraphSpan {
bucketSize := (t.endTime - t.startTime) / uint64(bucketCount)
if bucketSize == 0 {
bucketSize = 1
}
buckets := make([][]*FlamegraphSpan, bucketCount)
for _, span := range sorted {
if _, ok := exclude[span.SpanID]; ok {
continue
}
if span.Timestamp < t.startTime || span.Timestamp > t.endTime {
continue
}
idx := min(int((span.Timestamp-t.startTime)/bucketSize), bucketCount-1)
if len(buckets[idx]) < 2 {
buckets[idx] = append(buckets[idx], span)
}
}
var result []*FlamegraphSpan
for _, bucket := range buckets {
result = append(result, bucket...)
}
return result
}
func getLevelIndex(levels [][]*FlamegraphSpan, spanID string) int {
for i, lvl := range levels {
for _, span := range lvl {
if span.SpanID == spanID {
return i
}
}
}
return 0
}
func flamegraphSpanIndex(spans []*FlamegraphSpan, spanID string) int {
for i, s := range spans {
if s.SpanID == spanID {

View File

@@ -0,0 +1,270 @@
package spantypes
import (
"fmt"
"testing"
"time"
"github.com/stretchr/testify/assert"
)
// mkMinimal builds a MinimalSpan. timestamp and duration are in nanoseconds.
func mkMinimal(id, parentID string, timestamp, duration uint64) MinimalSpan {
return MinimalSpan{
SpanID: id,
ParentSpanID: parentID,
StartTime: time.Unix(0, int64(timestamp)),
DurationNano: duration,
}
}
// levelIDs extracts span IDs level-by-level from GetAllLevels output.
func levelIDs(levels [][]*FlamegraphSpan) [][]string {
out := make([][]string, len(levels))
for i, lvl := range levels {
ids := make([]string, len(lvl))
for j, s := range lvl {
ids[j] = s.SpanID
}
out[i] = ids
}
return out
}
// makeChainFG builds a linear trace of n spans: span0 → span1 → … → span(n-1).
func makeChainFG(n int) *FlamegraphTrace {
spans := make([]MinimalSpan, n)
for i := range n {
parent := ""
if i > 0 {
parent = fmt.Sprintf("span%d", i-1)
}
spans[i] = mkMinimal(fmt.Sprintf("span%d", i), parent, uint64(i*100), 100)
}
return NewFlamegraphTraceFromMinimal(spans)
}
// makeBroadTrace builds a trace: root → N children (wide, one level deep).
func makeBroadTrace(n int) *FlamegraphTrace {
spans := make([]MinimalSpan, n+1)
spans[0] = mkMinimal("root", "", 0, uint64(n*100))
for i := range n {
spans[i+1] = mkMinimal(fmt.Sprintf("child%d", i), "root", uint64(i*100), 100)
}
return NewFlamegraphTraceFromMinimal(spans)
}
// ─────────────────────────────────────────────────────────────────────────────
// buildSpanTree / GetAllLevels
// ─────────────────────────────────────────────────────────────────────────────
func TestGetAllLevels(t *testing.T) {
tests := []struct {
name string
spans []MinimalSpan
check func(t *testing.T, levels [][]*FlamegraphSpan)
}{
{
// root → child → grandchild: three levels, one span each.
name: "linear_chain",
spans: []MinimalSpan{
mkMinimal("root", "", 100, 300),
mkMinimal("child", "root", 150, 200),
mkMinimal("grandchild", "child", 200, 100),
},
check: func(t *testing.T, levels [][]*FlamegraphSpan) {
assert.Equal(t, [][]string{{"root"}, {"child"}, {"grandchild"}}, levelIDs(levels))
},
},
{
// root → [A, B]: level 0 has root, level 1 has both children.
name: "two_siblings",
spans: []MinimalSpan{
mkMinimal("root", "", 100, 300),
mkMinimal("A", "root", 150, 100),
mkMinimal("B", "root", 200, 100),
},
check: func(t *testing.T, levels [][]*FlamegraphSpan) {
assert.Equal(t, []string{"root"}, levelIDs(levels)[0])
assert.ElementsMatch(t, []string{"A", "B"}, levelIDs(levels)[1])
},
},
{
// child references a non-existent parent → synthetic "Missing Span" root at level 0.
name: "missing_parent",
spans: []MinimalSpan{
mkMinimal("child", "ghost", 100, 100),
},
check: func(t *testing.T, levels [][]*FlamegraphSpan) {
assert.Len(t, levels, 2)
assert.Equal(t, "ghost", levels[0][0].SpanID)
assert.Equal(t, "Missing Span", levels[0][0].Name)
assert.Equal(t, "child", levels[1][0].SpanID)
},
},
{
// Children must be nil after BFS so the tree does not stay live in memory.
name: "children_nilled_after_bfs",
spans: []MinimalSpan{
mkMinimal("root", "", 100, 200),
mkMinimal("child", "root", 150, 100),
},
check: func(t *testing.T, levels [][]*FlamegraphSpan) {
for _, lvl := range levels {
for _, s := range lvl {
assert.Nil(t, s.Children)
}
}
},
},
}
for _, tc := range tests {
t.Run(tc.name, func(t *testing.T) {
tc.check(t, NewFlamegraphTraceFromMinimal(tc.spans).GetAllLevels())
})
}
}
// ─────────────────────────────────────────────────────────────────────────────
// GetSelectedLevels
// ─────────────────────────────────────────────────────────────────────────────
func TestGetSelectedLevels(t *testing.T) {
tests := []struct {
name string
buildTrace func() *FlamegraphTrace
selectedSpan string
levelLimit int
spansPerLevel int
topK int
bucketCount int
check func(t *testing.T, levels []FlamegraphLevel)
}{
{
// Middle: 40% above (20 levels) + 60% below (30 levels) = 50 total.
name: "window_middle",
buildTrace: func() *FlamegraphTrace { return makeChainFG(100) },
selectedSpan: "span50",
levelLimit: 50,
spansPerLevel: 1000,
topK: 5,
bucketCount: 50,
check: func(t *testing.T, levels []FlamegraphLevel) {
assert.Equal(t, 50, len(levels))
assert.Equal(t, "span30", levels[0].SpanIDs[0])
},
},
{
// Near start: clamp at 0, redistribute budget downward — still 50 levels.
name: "window_near_start",
buildTrace: func() *FlamegraphTrace { return makeChainFG(100) },
selectedSpan: "span5",
levelLimit: 50,
spansPerLevel: 1000,
topK: 5,
bucketCount: 50,
check: func(t *testing.T, levels []FlamegraphLevel) {
assert.Equal(t, 50, len(levels))
assert.Equal(t, "span0", levels[0].SpanIDs[0])
},
},
{
// Near end: clamp at total, redistribute budget upward — still 50 levels.
name: "window_near_end",
buildTrace: func() *FlamegraphTrace { return makeChainFG(100) },
selectedSpan: "span95",
levelLimit: 50,
spansPerLevel: 1000,
topK: 5,
bucketCount: 50,
check: func(t *testing.T, levels []FlamegraphLevel) {
assert.Equal(t, 50, len(levels))
assert.Equal(t, "span50", levels[0].SpanIDs[0])
},
},
{
// Unknown span ID falls back to level 0.
name: "unknown_span_defaults_to_level_zero",
buildTrace: func() *FlamegraphTrace { return makeChainFG(10) },
selectedSpan: "nonexistent",
levelLimit: 5,
spansPerLevel: 1000,
topK: 5,
bucketCount: 50,
check: func(t *testing.T, levels []FlamegraphLevel) {
assert.Equal(t, "span0", levels[0].SpanIDs[0])
},
},
{
// Dense levels are sampled down to approximately spansPerLevel.
name: "sampling_respects_cap",
buildTrace: func() *FlamegraphTrace { return makeBroadTrace(200) },
selectedSpan: "root",
levelLimit: 10,
spansPerLevel: 10,
topK: 3,
bucketCount: 5,
check: func(t *testing.T, levels []FlamegraphLevel) {
for _, lvl := range levels {
assert.LessOrEqual(t, len(lvl.SpanIDs), 10+3+5*2)
}
},
},
{
// Selected span always survives sampling even when not in topK.
name: "selected_span_always_included",
buildTrace: func() *FlamegraphTrace { return makeBroadTrace(200) },
selectedSpan: "child99",
levelLimit: 10,
spansPerLevel: 5,
topK: 3,
bucketCount: 5,
check: func(t *testing.T, levels []FlamegraphLevel) {
found := false
for _, lvl := range levels {
for _, id := range lvl.SpanIDs {
if id == "child99" {
found = true
}
}
}
assert.True(t, found, "selected span must survive sampling")
},
},
{
// Selected span is also the highest-latency span (lands in topK) — must not appear twice.
name: "no_duplicate_span_ids",
buildTrace: func() *FlamegraphTrace {
spans := make([]MinimalSpan, 201)
spans[0] = mkMinimal("root", "", 0, 10000)
for i := range 200 {
spans[i+1] = mkMinimal(fmt.Sprintf("child%d", i), "root", uint64(i*50), uint64(200-i)*10)
}
return NewFlamegraphTraceFromMinimal(spans)
},
selectedSpan: "child0",
levelLimit: 10,
spansPerLevel: 5,
topK: 3,
bucketCount: 10,
check: func(t *testing.T, levels []FlamegraphLevel) {
for _, lvl := range levels {
seen := map[string]bool{}
for _, id := range lvl.SpanIDs {
assert.False(t, seen[id], "duplicate span ID %q at level %d", id, lvl.Level)
seen[id] = true
}
}
},
},
}
for _, tc := range tests {
t.Run(tc.name, func(t *testing.T) {
// fresh trace per subtest: GetAllLevels is destructive (nils Children)
levels := tc.buildTrace().GetSelectedLevels(tc.selectedSpan, tc.levelLimit, tc.spansPerLevel, tc.topK, tc.bucketCount)
tc.check(t, levels)
})
}
}

View File

@@ -21,28 +21,13 @@ const (
// ErrTraceNotFound is returned when a trace ID has no matching spans in ClickHouse.
var ErrTraceNotFound = errors.NewNotFoundf(errors.CodeNotFound, "trace not found")
// PostableWaterfall is the request body for the v3 waterfall API.
// PostableWaterfall is the request body for the waterfall API.
type PostableWaterfall struct {
SelectedSpanID string `json:"selectedSpanId"`
UncollapsedSpans []string `json:"uncollapsedSpans"`
Limit uint `json:"limit"`
Aggregations []SpanAggregation `json:"aggregations"`
SelectedSpanID string `json:"selectedSpanId"`
UncollapsedSpans []string `json:"uncollapsedSpans"`
}
func (p *PostableWaterfall) Validate() error {
if len(p.Aggregations) > maxAggregationItems {
return ErrTooManyAggregationItems
}
for _, a := range p.Aggregations {
if !a.Aggregation.isValid() {
return errors.NewInvalidInputf(errors.CodeInvalidInput, "unknown aggregation type: %q", a.Aggregation)
}
fc := a.Field.FieldContext
if fc != telemetrytypes.FieldContextResource && fc != telemetrytypes.FieldContextAttribute {
return errors.NewInvalidInputf(errors.CodeInvalidInput, "aggregation field context must be %q or %q, got %q",
telemetrytypes.FieldContextResource, telemetrytypes.FieldContextAttribute, fc)
}
}
return nil
}

View File

@@ -8,7 +8,6 @@ import (
"time"
"github.com/SigNoz/signoz/pkg/types/cachetypes"
"github.com/SigNoz/signoz/pkg/types/telemetrytypes"
)
type TraceSummary struct {
@@ -29,19 +28,18 @@ type WaterfallTrace struct {
HasMissingSpans bool `json:"hasMissingSpans"`
}
// GettableWaterfallTrace is the response for the v3 waterfall API.
// GettableWaterfallTrace is the response for the waterfall API.
type GettableWaterfallTrace struct {
StartTimestampMillis uint64 `json:"startTimestampMillis"`
EndTimestampMillis uint64 `json:"endTimestampMillis"`
RootServiceName string `json:"rootServiceName"`
RootServiceEntryPoint string `json:"rootServiceEntryPoint"`
TotalSpansCount uint64 `json:"totalSpansCount"`
TotalErrorSpansCount uint64 `json:"totalErrorSpansCount"`
Spans []*WaterfallSpan `json:"spans"`
HasMissingSpans bool `json:"hasMissingSpans"`
UncollapsedSpans []string `json:"uncollapsedSpans"`
HasMore bool `json:"hasMore"`
Aggregations []SpanAggregationResult `json:"aggregations"`
StartTimestampMillis uint64 `json:"startTimestampMillis"`
EndTimestampMillis uint64 `json:"endTimestampMillis"`
RootServiceName string `json:"rootServiceName"`
RootServiceEntryPoint string `json:"rootServiceEntryPoint"`
TotalSpansCount uint64 `json:"totalSpansCount"`
TotalErrorSpansCount uint64 `json:"totalErrorSpansCount"`
Spans []*WaterfallSpan `json:"spans"`
HasMissingSpans bool `json:"hasMissingSpans"`
UncollapsedSpans []string `json:"uncollapsedSpans"`
HasMore bool `json:"hasMore"`
}
// NewWaterfallTrace constructs a WaterfallTrace from processed span data.
@@ -122,23 +120,6 @@ func NewWaterfallTraceFromSpans(nodes []*WaterfallSpan) *WaterfallTrace {
)
}
func (wt *WaterfallTrace) GetWaterfallSpans(uncollapsedSpanIDs []string, selectedSpanID string, limit uint, spanPageSize float64, maxDepthToAutoExpand int) ([]*WaterfallSpan, []string, bool) {
// Span selection decision: all spans or windowed
selectAllSpans := wt.TotalSpans <= uint64(limit)
var (
selectedSpans []*WaterfallSpan
uncollapsedSpans []string
)
if selectAllSpans {
selectedSpans = wt.GetAllSpans()
} else {
selectedSpans, uncollapsedSpans = wt.GetSelectedSpans(uncollapsedSpanIDs, selectedSpanID, spanPageSize, maxDepthToAutoExpand)
}
return selectedSpans, uncollapsedSpans, selectAllSpans
}
// GetAllSpans returns all spans with pre order traversal.
func (wt *WaterfallTrace) GetAllSpans() []*WaterfallSpan {
var preOrderedSpans []*WaterfallSpan
@@ -237,7 +218,6 @@ func NewGettableWaterfallTrace(
selectedSpans []*WaterfallSpan,
uncollapsedSpans []string,
selectAllSpans bool,
aggregations []SpanAggregationResult,
) *GettableWaterfallTrace {
var rootServiceName, rootServiceEntryPoint string
if len(traceData.TraceRoots) > 0 {
@@ -250,15 +230,6 @@ func NewGettableWaterfallTrace(
span.TimeUnix = span.TimeUnix / 1_000_000
}
// duration values are in nanoseconds; convert in-place to milliseconds.
for i := range aggregations {
if aggregations[i].Aggregation == SpanAggregationDuration {
for k, v := range aggregations[i].Value {
aggregations[i].Value[k] = v / 1_000_000
}
}
}
return &GettableWaterfallTrace{
Spans: selectedSpans,
UncollapsedSpans: uncollapsedSpans,
@@ -270,7 +241,6 @@ func NewGettableWaterfallTrace(
RootServiceEntryPoint: rootServiceEntryPoint,
HasMissingSpans: traceData.HasMissingSpans,
HasMore: !selectAllSpans,
Aggregations: aggregations,
}
}
@@ -292,78 +262,3 @@ func windowAroundIndex(selectedIndex, total int, spanLimitPerRequest float64) (s
start = max(start, 0)
return
}
// mergeSpanIntervals computes non-overlapping execution time for a set of spans.
func mergeSpanIntervals(spans []*WaterfallSpan) uint64 {
if len(spans) == 0 {
return 0
}
sort.Slice(spans, func(i, j int) bool {
return spans[i].TimeUnix < spans[j].TimeUnix
})
currentStart := spans[0].TimeUnix
currentEnd := currentStart + spans[0].DurationNano
total := uint64(0)
for _, span := range spans[1:] {
startNano := span.TimeUnix
endNano := startNano + span.DurationNano
if currentEnd >= startNano {
if endNano > currentEnd {
currentEnd = endNano
}
} else {
total += currentEnd - currentStart
currentStart = startNano
currentEnd = endNano
}
}
return total + (currentEnd - currentStart)
}
// GetSpanAggregation computes one aggregation result over all spans in the trace.
// Duration values are returned in nanoseconds; callers convert to milliseconds as needed.
func (wt *WaterfallTrace) GetSpanAggregation(aggregation SpanAggregationType, field telemetrytypes.TelemetryFieldKey) SpanAggregationResult {
result := SpanAggregationResult{
Field: field,
Aggregation: aggregation,
Value: make(map[string]uint64),
}
switch aggregation {
case SpanAggregationSpanCount:
for _, span := range wt.SpanIDToSpanNodeMap {
if key, ok := span.FieldValue(field); ok {
result.Value[key]++
}
}
case SpanAggregationDuration:
spansByField := make(map[string][]*WaterfallSpan)
for _, span := range wt.SpanIDToSpanNodeMap {
if key, ok := span.FieldValue(field); ok {
spansByField[key] = append(spansByField[key], span)
}
}
for key, spans := range spansByField {
result.Value[key] = mergeSpanIntervals(spans)
}
case SpanAggregationExecutionTimePercentage:
traceDuration := wt.EndTime - wt.StartTime
spansByField := make(map[string][]*WaterfallSpan)
for _, span := range wt.SpanIDToSpanNodeMap {
if key, ok := span.FieldValue(field); ok {
spansByField[key] = append(spansByField[key], span)
}
}
if traceDuration > 0 {
for key, spans := range spansByField {
result.Value[key] = mergeSpanIntervals(spans) * 100 / traceDuration
}
}
}
return result
}