Compare commits

...

11 Commits

Author SHA1 Message Date
Nikhil Soni
3d2acb729c fix: update alias in order by of the query 2026-06-02 23:56:33 +05:30
Nikhil Soni
39d4a7e586 chore: update openapi specs 2026-06-02 23:07:16 +05:30
Nikhil Soni
acbdb9dc0c chore: remove service name root level field from flamegraph span 2026-06-02 22:56:55 +05:30
Nikhil Soni
70532d51e6 refactor: switch to using group by instead of distinct on
Since group by is faster is enough no. of threads are used
2026-06-02 22:51:29 +05:30
Nikhil Soni
885a0cbee9 chore: mark all fields in response as required 2026-06-02 22:39:32 +05:30
Nikhil Soni
1b72167d01 chore: update openapi specs 2026-06-02 22:27:51 +05:30
Nikhil Soni
bad6cb79aa chore: make array fields required and non nullable 2026-06-02 22:27:44 +05:30
Nikhil Soni
d8c2ac577f feat: return selected fields only in flamegraph
And reduce the no. of fields scanned from db
2026-06-02 19:04:41 +05:30
Nikhil Soni
73a0d72136 chore: avoid passing request type to module 2026-06-02 17:30:49 +05:30
Nikhil Soni
abf61470b0 chore: remove un related changes to keep diff minimum 2026-06-02 16:58:48 +05:30
Nikhil Soni
80b2520e44 chore: move flamegraph after aggregation to reduce diff 2026-06-02 16:51:46 +05:30
10 changed files with 162 additions and 96 deletions

View File

@@ -6527,7 +6527,6 @@ components:
event:
items:
$ref: '#/components/schemas/SpantypesEvent'
nullable: true
type: array
hasError:
type: boolean
@@ -6542,13 +6541,13 @@ components:
additionalProperties:
type: string
type: object
serviceName:
type: string
spanId:
type: string
timestamp:
minimum: 0
type: integer
required:
- event
type: object
SpantypesGettableFlamegraphTrace:
properties:
@@ -6562,11 +6561,15 @@ components:
items:
$ref: '#/components/schemas/SpantypesFlamegraphSpan'
type: array
nullable: true
type: array
startTimestampMillis:
format: int64
type: integer
required:
- spans
- startTimestampMillis
- endTimestampMillis
- hasMore
type: object
SpantypesGettableSpanMapperGroups:
properties:

View File

@@ -7690,9 +7690,9 @@ export interface SpantypesFlamegraphSpanDTO {
*/
durationNano?: number;
/**
* @type array,null
* @type array
*/
event?: SpantypesEventDTO[] | null;
event: SpantypesEventDTO[];
/**
* @type boolean
*/
@@ -7714,10 +7714,6 @@ export interface SpantypesFlamegraphSpanDTO {
* @type object
*/
resource?: SpantypesFlamegraphSpanDTOResource;
/**
* @type string
*/
serviceName?: string;
/**
* @type string
*/
@@ -7734,20 +7730,20 @@ export interface SpantypesGettableFlamegraphTraceDTO {
* @type integer
* @format int64
*/
endTimestampMillis?: number;
endTimestampMillis: number;
/**
* @type boolean
*/
hasMore?: boolean;
hasMore: boolean;
/**
* @type array,null
* @type array
*/
spans?: SpantypesFlamegraphSpanDTO[][] | null;
spans: SpantypesFlamegraphSpanDTO[][];
/**
* @type integer
* @format int64
*/
startTimestampMillis?: number;
startTimestampMillis: number;
}
export type SpantypesSpanMapperGroupConditionDTOAnyOf = {

View File

@@ -87,7 +87,8 @@ func (h *handler) GetFlamegraph(rw http.ResponseWriter, r *http.Request) {
render.Error(rw, err)
return
}
result, err := h.module.GetFlamegraph(r.Context(), mux.Vars(r)["traceID"], req)
result, err := h.module.GetFlamegraph(r.Context(), mux.Vars(r)["traceID"], req.SelectedSpanID, req.SelectFields)
if err != nil {
render.Error(rw, err)
return

View File

@@ -7,6 +7,7 @@ import (
"github.com/SigNoz/signoz/pkg/factory"
"github.com/SigNoz/signoz/pkg/modules/tracedetail"
"github.com/SigNoz/signoz/pkg/types/spantypes"
"github.com/SigNoz/signoz/pkg/types/telemetrytypes"
)
type module struct {
@@ -46,6 +47,29 @@ func (m *module) GetWaterfall(ctx context.Context, traceID string, req *spantype
return spantypes.NewGettableWaterfallTrace(waterfallTrace, selectedSpans, uncollapsedSpans, selectedAllSpans, aggregationResults), nil
}
// getTraceData fetches all spans for a trace and builds the WaterfallTrace.
func (m *module) getTraceData(ctx context.Context, traceID string) (*spantypes.WaterfallTrace, error) {
summary, err := m.store.GetTraceSummary(ctx, traceID)
if err != nil {
return nil, err
}
spanItems, err := m.store.GetTraceSpans(ctx, traceID, summary)
if err != nil {
return nil, err
}
if len(spanItems) == 0 {
return nil, spantypes.ErrTraceNotFound
}
nodes := make([]*spantypes.WaterfallSpan, len(spanItems))
for i := range spanItems {
nodes[i] = spanItems[i].ToWaterfallSpan(traceID)
}
return spantypes.NewWaterfallTraceFromSpans(nodes), nil
}
// GetWaterfallV4 is the OOM-safe V4 waterfall.
// For large traces (NumSpans > effectiveLimit) it uses a two-step fetch:
// minimal fields for all spans to build the tree, then full fields for the
@@ -62,15 +86,24 @@ func (m *module) GetWaterfallV4(ctx context.Context, traceID string, selectedSpa
return m.getFullWaterfall(ctx, traceID, summary)
}
func (m *module) GetFlamegraph(ctx context.Context, traceID string, req *spantypes.PostableFlamegraph) (*spantypes.GettableFlamegraphTrace, error) {
summary, err := m.store.GetTraceSummary(ctx, traceID)
func (m *module) getFullWaterfall(ctx context.Context, traceID string, summary *spantypes.TraceSummary) (*spantypes.GettableWaterfallTrace, error) {
spanItems, err := m.store.GetTraceSpans(ctx, traceID, summary)
if err != nil {
return nil, err
}
if summary.NumSpans <= uint64(m.config.Flamegraph.SelectAllSpansLimit) {
return m.getFullFlamegraph(ctx, traceID, summary)
if len(spanItems) == 0 {
return nil, spantypes.ErrTraceNotFound
}
return m.getWindowedFlamegraph(ctx, traceID, req.SelectedSpanID, summary)
nodes := make([]*spantypes.WaterfallSpan, len(spanItems))
for i := range spanItems {
nodes[i] = spanItems[i].ToWaterfallSpan(traceID)
}
waterfallTrace := spantypes.NewWaterfallTraceFromSpans(nodes)
selectedSpans := waterfallTrace.GetAllSpans()
return spantypes.NewGettableWaterfallTrace(waterfallTrace, selectedSpans, nil, true, nil), nil
}
func (m *module) GetTraceAggregations(ctx context.Context, traceID string, req *spantypes.PostableTraceAggregations) (*spantypes.GettableTraceAggregations, error) {
@@ -78,6 +111,7 @@ func (m *module) GetTraceAggregations(ctx context.Context, traceID string, req *
if err != nil {
return nil, err
}
traceDurationNs := uint64(summary.End.UnixNano()) - uint64(summary.Start.UnixNano())
results := make([]spantypes.SpanAggregationResult, 0, len(req.Aggregations))
@@ -115,47 +149,15 @@ func (m *module) GetTraceAggregations(ctx context.Context, traceID string, req *
return &spantypes.GettableTraceAggregations{Aggregations: results}, nil
}
// getTraceData fetches all spans for a trace and builds the WaterfallTrace.
func (m *module) getTraceData(ctx context.Context, traceID string) (*spantypes.WaterfallTrace, error) {
func (m *module) GetFlamegraph(ctx context.Context, traceID string, selectedSpanID string, selectFields []telemetrytypes.TelemetryFieldKey) (*spantypes.GettableFlamegraphTrace, error) {
summary, err := m.store.GetTraceSummary(ctx, traceID)
if err != nil {
return nil, err
}
spanItems, err := m.store.GetTraceSpans(ctx, traceID, summary)
if err != nil {
return nil, err
if summary.NumSpans <= uint64(m.config.Flamegraph.SelectAllSpansLimit) {
return m.getFullFlamegraph(ctx, traceID, summary, selectFields)
}
if len(spanItems) == 0 {
return nil, spantypes.ErrTraceNotFound
}
nodes := make([]*spantypes.WaterfallSpan, len(spanItems))
for i := range spanItems {
nodes[i] = spanItems[i].ToWaterfallSpan(traceID)
}
return spantypes.NewWaterfallTraceFromSpans(nodes), nil
}
func (m *module) getFullWaterfall(ctx context.Context, traceID string, summary *spantypes.TraceSummary) (*spantypes.GettableWaterfallTrace, error) {
spanItems, err := m.store.GetTraceSpans(ctx, traceID, summary)
if err != nil {
return nil, err
}
if len(spanItems) == 0 {
return nil, spantypes.ErrTraceNotFound
}
nodes := make([]*spantypes.WaterfallSpan, len(spanItems))
for i := range spanItems {
nodes[i] = spanItems[i].ToWaterfallSpan(traceID)
}
waterfallTrace := spantypes.NewWaterfallTraceFromSpans(nodes)
selectedSpans := waterfallTrace.GetAllSpans()
return spantypes.NewGettableWaterfallTrace(waterfallTrace, selectedSpans, nil, true, nil), nil
return m.getWindowedFlamegraph(ctx, traceID, selectedSpanID, summary, selectFields)
}
// getWindowedWaterfall builds the waterfall tree with minimal data and then returns only a window of full spans.
@@ -199,23 +201,20 @@ func (m *module) getWindowedWaterfall(ctx context.Context, traceID, selectedSpan
), nil
}
func (m *module) getFullFlamegraph(ctx context.Context, traceID string, summary *spantypes.TraceSummary) (*spantypes.GettableFlamegraphTrace, error) {
fullSpans, err := m.store.GetTraceSpans(ctx, traceID, summary)
func (m *module) getFullFlamegraph(ctx context.Context, traceID string, summary *spantypes.TraceSummary, selectFields []telemetrytypes.TelemetryFieldKey) (*spantypes.GettableFlamegraphTrace, error) {
fullSpans, err := m.store.GetFlamegraphSpans(ctx, traceID, summary.Start, summary.End, nil)
if err != nil {
return nil, err
}
if len(fullSpans) == 0 {
return nil, spantypes.ErrTraceNotFound
}
flamegraphTrace := spantypes.NewFlamegraphTraceFromStorable(fullSpans)
return spantypes.NewGettableFlamegraphTrace(
flamegraphTrace.GetAllLevels(),
summary.Start.UnixMilli(), summary.End.UnixMilli(), false,
), nil
flamegraphTrace := spantypes.NewFlamegraphTraceFromStorable(fullSpans, selectFields)
return spantypes.NewGettableFlamegraphTrace(flamegraphTrace.GetAllLevels(), summary.Start.UnixMilli(), summary.End.UnixMilli(), false), nil
}
// getWindowedFlamegraph returns a window of a max levels and max sampled spans per level around the selected span.
func (m *module) getWindowedFlamegraph(ctx context.Context, traceID, selectedSpanID string, summary *spantypes.TraceSummary) (*spantypes.GettableFlamegraphTrace, error) {
func (m *module) getWindowedFlamegraph(ctx context.Context, traceID, selectedSpanID string, summary *spantypes.TraceSummary, selectFields []telemetrytypes.TelemetryFieldKey) (*spantypes.GettableFlamegraphTrace, error) {
minimalSpans, err := m.store.GetMinimalSpans(ctx, traceID, summary.Start, summary.End)
if err != nil {
return nil, err
@@ -228,20 +227,20 @@ func (m *module) getWindowedFlamegraph(ctx context.Context, traceID, selectedSpa
minimalSpans = nil //nolint:ineffassign,wastedassign // release backing array before further db calls
cfg := m.config.Flamegraph
selectedSpans := flamegraphTrace.GetSelectedLevels(selectedSpanID,
cfg.MaxSelectedLevels, cfg.MaxSpansPerLevel, cfg.SamplingTopLatencySpansCount, cfg.SamplingBucketCount)
selectedSpans := flamegraphTrace.GetSelectedLevels(selectedSpanID, cfg.MaxSelectedLevels, cfg.MaxSpansPerLevel, cfg.SamplingTopLatencySpansCount, cfg.SamplingBucketCount)
if len(selectedSpans) == 0 {
return nil, spantypes.ErrTraceNotFound
}
fullSpans, err := m.store.GetTraceSpansByIDs(ctx, traceID, summary.Start, summary.End,
spantypes.FlamegraphWindowSpanIDs(selectedSpans))
fullSpans, err := m.store.GetFlamegraphSpans(ctx, traceID, summary.Start, summary.End, spantypes.FlamegraphWindowSpanIDs(selectedSpans))
if err != nil {
return nil, err
}
return spantypes.NewGettableFlamegraphTrace(
flamegraphTrace.EnrichSelectedSpans(selectedSpans, fullSpans),
summary.Start.UnixMilli(), summary.End.UnixMilli(), true,
flamegraphTrace.EnrichSelectedSpans(selectedSpans, fullSpans, selectFields),
summary.Start.UnixMilli(),
summary.End.UnixMilli(),
true,
), nil
}

View File

@@ -154,6 +154,46 @@ func (s *traceStore) GetTraceSpansByIDs(ctx context.Context, traceID string, sta
return spans, nil
}
func (s *traceStore) GetFlamegraphSpans(ctx context.Context, traceID string, start, end time.Time, spanIDs []string) ([]spantypes.StorableSpan, error) {
sb := sqlbuilder.NewSelectBuilder()
sb.Select(
"span_id",
"any(parent_span_id) AS parent_span_id",
"any(timestamp) AS timestamp",
"any(duration_nano) AS duration_nano",
"any(has_error) AS has_error",
"any(name) AS name",
"any(events) AS events",
"any(attributes_string) AS attributes_string",
"any(attributes_number) AS attributes_number",
"any(attributes_bool) AS attributes_bool",
"any(resources_string) AS resources_string",
)
sb.From(fmt.Sprintf("%s.%s", spantypes.TraceDB, spantypes.TraceTable))
conditions := []string{
sb.E("trace_id", traceID),
sb.GE("ts_bucket_start", start.Unix()-1800),
sb.LE("ts_bucket_start", end.Unix()),
}
if len(spanIDs) > 0 {
ids := make([]any, len(spanIDs))
for i, id := range spanIDs {
ids[i] = id
}
conditions = append(conditions, sb.In("span_id", ids...))
}
sb.Where(conditions...)
sb.GroupBy("span_id")
sb.OrderBy("timestamp ASC", "name ASC")
query, args := sb.BuildWithFlavor(sqlbuilder.ClickHouse)
var spans []spantypes.StorableSpan
if err := s.telemetryStore.ClickhouseDB().Select(ctx, &spans, query, args...); err != nil {
return nil, errors.WrapInternalf(err, errors.CodeInternal, "error querying flamegraph spans")
}
return spans, nil
}
func (s *traceStore) GetSpanCountByField(ctx context.Context, traceID string, summary *spantypes.TraceSummary, fieldKey telemetrytypes.TelemetryFieldKey) (map[string]uint64, error) {
fieldExpr, err := buildFieldExpr(fieldKey)
if err != nil {

View File

@@ -5,6 +5,7 @@ import (
"net/http"
"github.com/SigNoz/signoz/pkg/types/spantypes"
"github.com/SigNoz/signoz/pkg/types/telemetrytypes"
)
// Handler exposes HTTP handlers for trace detail APIs.
@@ -20,5 +21,5 @@ type Module interface {
GetWaterfall(ctx context.Context, traceID string, req *spantypes.PostableWaterfall) (*spantypes.GettableWaterfallTrace, error)
GetWaterfallV4(ctx context.Context, traceID string, selectedSpanID string, uncollapsedSpans []string, selectAllLimit uint) (*spantypes.GettableWaterfallTrace, error)
GetTraceAggregations(ctx context.Context, traceID string, req *spantypes.PostableTraceAggregations) (*spantypes.GettableTraceAggregations, error)
GetFlamegraph(ctx context.Context, traceID string, req *spantypes.PostableFlamegraph) (*spantypes.GettableFlamegraphTrace, error)
GetFlamegraph(ctx context.Context, traceID string, selectedSpanID string, selectFields []telemetrytypes.TelemetryFieldKey) (*spantypes.GettableFlamegraphTrace, error)
}

View File

@@ -1,8 +1,6 @@
package spantypes
import (
"maps"
"github.com/SigNoz/signoz/pkg/types/telemetrytypes"
)
@@ -12,10 +10,9 @@ type FlamegraphSpan struct {
Timestamp uint64 `json:"timestamp"`
DurationNano uint64 `json:"durationNano"`
HasError bool `json:"hasError"`
ServiceName string `json:"serviceName"`
Name string `json:"name"`
Level int64 `json:"level"`
Events []Event `json:"event"`
Events []Event `json:"event" required:"true" nullable:"false"`
Attributes map[string]any `json:"attributes,omitempty"`
Resource map[string]string `json:"resource,omitempty"`
Children []*FlamegraphSpan `json:"-"` // internal tree use only
@@ -34,10 +31,10 @@ type PostableFlamegraph struct {
// GettableFlamegraphTrace is the response for the v3 flamegraph API.
type GettableFlamegraphTrace struct {
Spans [][]*FlamegraphSpan `json:"spans"`
StartTimestampMillis int64 `json:"startTimestampMillis"`
EndTimestampMillis int64 `json:"endTimestampMillis"`
HasMore bool `json:"hasMore"`
Spans [][]*FlamegraphSpan `json:"spans" required:"true" nullable:"false"`
StartTimestampMillis int64 `json:"startTimestampMillis" required:"true"`
EndTimestampMillis int64 `json:"endTimestampMillis" required:"true"`
HasMore bool `json:"hasMore" required:"true"`
}
func NewGettableFlamegraphTrace(spans [][]*FlamegraphSpan, startMs, endMs int64, hasMore bool) *GettableFlamegraphTrace {
@@ -49,22 +46,39 @@ func NewGettableFlamegraphTrace(spans [][]*FlamegraphSpan, startMs, endMs int64,
}
}
func NewFlamegraphSpanFromStorable(s *StorableSpan, level int64) *FlamegraphSpan {
resources := make(map[string]string, len(s.ResourcesString))
maps.Copy(resources, s.ResourcesString)
return &FlamegraphSpan{
func NewFlamegraphSpanFromStorable(s *StorableSpan, level int64, selectFields []telemetrytypes.TelemetryFieldKey) *FlamegraphSpan {
span := &FlamegraphSpan{
SpanID: s.SpanID,
ParentSpanID: s.ParentSpanID,
Timestamp: uint64(s.StartTime.UnixNano()),
DurationNano: s.DurationNano,
HasError: s.HasError,
ServiceName: s.ServiceName,
Name: s.Name,
Level: level,
Events: s.UnmarshalledEvents(),
Attributes: s.Attributes(),
Resource: resources,
}
if len(selectFields) == 0 {
return span
}
for _, field := range selectFields {
switch field.FieldContext {
case telemetrytypes.FieldContextResource:
if v, ok := s.ResourcesString[field.Name]; ok && v != "" {
if span.Resource == nil {
span.Resource = make(map[string]string)
}
span.Resource[field.Name] = v
}
case telemetrytypes.FieldContextAttribute:
if v := s.AttributeValue(field.Name); v != nil {
if span.Attributes == nil {
span.Attributes = make(map[string]any)
}
span.Attributes[field.Name] = v
}
}
}
return span
}
// FlamegraphWindowSpanIDs collects all span IDs from a level window into a flat slice.

View File

@@ -2,6 +2,8 @@ package spantypes
import (
"sort"
"github.com/SigNoz/signoz/pkg/types/telemetrytypes"
)
// FlamegraphTrace holds the level wise tree built from minimal spans.
@@ -25,12 +27,12 @@ func NewFlamegraphTraceFromMinimal(spans []MinimalSpan) *FlamegraphTrace {
return t
}
func NewFlamegraphTraceFromStorable(spans []StorableSpan) *FlamegraphTrace {
func NewFlamegraphTraceFromStorable(spans []StorableSpan, selectFields []telemetrytypes.TelemetryFieldKey) *FlamegraphTrace {
t := &FlamegraphTrace{
nodeByID: make(map[string]*FlamegraphSpan, len(spans)),
}
for i := range spans {
node := NewFlamegraphSpanFromStorable(&spans[i], 0) // level is set later by BFS
node := NewFlamegraphSpanFromStorable(&spans[i], 0, selectFields) // level is set later by BFS
t.updateTimeRange(node.Timestamp, node.DurationNano)
t.nodeByID[node.SpanID] = node
}
@@ -43,14 +45,11 @@ func (t *FlamegraphTrace) GetAllLevels() [][]*FlamegraphSpan {
}
// GetSelectedLevels returns the window of levels around selectedSpanID with sampling applied to dense levels.
func (t *FlamegraphTrace) GetSelectedLevels(
selectedSpanID string,
levelLimit, spansPerLevel, topLatencyCount, bucketCount int,
) []FlamegraphLevel {
func (t *FlamegraphTrace) GetSelectedLevels(selectedSpanID string, levelLimit, spansPerLevel, topLatencyCount, bucketCount int) []FlamegraphLevel {
return nil
}
func (t *FlamegraphTrace) EnrichSelectedSpans(selectedSpans []FlamegraphLevel, fullSpans []StorableSpan) [][]*FlamegraphSpan {
func (t *FlamegraphTrace) EnrichSelectedSpans(selectedSpans []FlamegraphLevel, fullSpans []StorableSpan, selectFields []telemetrytypes.TelemetryFieldKey) [][]*FlamegraphSpan {
fullByID := make(map[string]*StorableSpan, len(fullSpans))
for i := range fullSpans {
fullByID[fullSpans[i].SpanID] = &fullSpans[i]
@@ -61,7 +60,7 @@ func (t *FlamegraphTrace) EnrichSelectedSpans(selectedSpans []FlamegraphLevel, f
result[i] = make([]*FlamegraphSpan, 0, len(lvl.SpanIDs))
for _, spanID := range lvl.SpanIDs {
if full, ok := fullByID[spanID]; ok {
result[i] = append(result[i], NewFlamegraphSpanFromStorable(full, lvl.Level))
result[i] = append(result[i], NewFlamegraphSpanFromStorable(full, lvl.Level, selectFields))
} else if lean, ok := t.nodeByID[spanID]; ok {
result[i] = append(result[i], lean)
}

View File

@@ -30,6 +30,7 @@ type TraceStore interface {
GetTraceSpans(ctx context.Context, traceID string, summary *TraceSummary) ([]StorableSpan, error)
GetMinimalSpans(ctx context.Context, traceID string, start, end time.Time) ([]MinimalSpan, error)
GetTraceSpansByIDs(ctx context.Context, traceID string, start, end time.Time, spanIDs []string) ([]StorableSpan, error)
GetFlamegraphSpans(ctx context.Context, traceID string, start, end time.Time, spanIDs []string) ([]StorableSpan, error)
GetSpanCountByField(ctx context.Context, traceID string, summary *TraceSummary, fieldKey telemetrytypes.TelemetryFieldKey) (map[string]uint64, error)
GetSpanDurationByField(ctx context.Context, traceID string, summary *TraceSummary, fieldKey telemetrytypes.TelemetryFieldKey) (map[string]uint64, error)

View File

@@ -171,7 +171,6 @@ func (item *MinimalSpan) ToFlamegraphSpan() *FlamegraphSpan {
Timestamp: uint64(item.StartTime.UnixNano()),
DurationNano: item.DurationNano,
HasError: item.HasError,
ServiceName: item.ServiceName,
Children: make([]*FlamegraphSpan, 0),
}
}
@@ -279,6 +278,19 @@ func (ws *WaterfallSpan) getPathToSelectedSpanID(selectedSpanID string) ([]strin
return nil, false
}
func (item *StorableSpan) AttributeValue(name string) any {
if v, ok := item.AttributesString[name]; ok {
return v
}
if v, ok := item.AttributesNumber[name]; ok {
return v
}
if v, ok := item.AttributesBool[name]; ok {
return v
}
return nil
}
func (item *StorableSpan) Attributes() map[string]any {
attributes := make(map[string]any, len(item.AttributesString)+len(item.AttributesNumber)+len(item.AttributesBool))
for k, v := range item.AttributesString {