Compare commits

...

2 Commits

Author SHA1 Message Date
SagarRajput-7
2e8aa0f42d fix(sso): strip conditional google auth and role mapping fields from save payload (#11613)
Some checks are pending
build-staging / prepare (push) Waiting to run
build-staging / js-build (push) Blocked by required conditions
build-staging / go-build (push) Blocked by required conditions
build-staging / staging (push) Blocked by required conditions
Release Drafter / update_release_draft (push) Waiting to run
2026-06-08 20:16:19 +00:00
Nikhil Soni
c688170a13 feat(trace-detail): add flamegraph v3 with optimized memory consumption (part 1) (#11462)
* feat: add types for flamegraph v3 in module structure

* chore: remove limit from request payload

It's a new api so doesn't need to be backward compatible

* feat: add config for flamegraph

* feat: add method to enrich selected spans

* feat: add api and module for flamegraph v3

* feat: query full spans for smaller traces

* chore: move exported methods to the top

* chore: ignore nil assigment lint error

* chore: extract out flamegraph building logic for easier review

* chore: update openapi specs

* chore: move flamegraph after aggregation to reduce diff

* chore: remove un related changes to keep diff minimum

* chore: avoid passing request type to module

* feat: return selected fields only in flamegraph

And reduce the no. of fields scanned from db

* chore: make array fields required and non nullable

* chore: update openapi specs

* chore: mark all fields in response as required

* refactor: switch to using group by instead of distinct on

Since group by is faster is enough no. of threads are used

* chore: remove service name root level field from flamegraph span

* chore: update openapi specs

* fix: update alias in order by of the query

* chore: remove unnecessary nil check

* fix: set empty array for missing data

* chore: use single line for error creation

* chore: mark response fields as required

* chore: update openapi specs

* chore: add test to verify the flamegraph query sql

* chore: update openapi specs

* chore: use orderbyasc instead of order by
2026-06-08 17:29:30 +00:00
17 changed files with 990 additions and 5 deletions

View File

@@ -440,6 +440,17 @@ traces:
max_depth_to_auto_expand: 5
# Threshold below which all spans are returned without windowing.
max_limit_to_select_all_spans: 10000
flamegraph:
# Maximum number of BFS depth levels included in a windowed response.
max_selected_levels: 50
# Maximum spans per level before sampling is applied.
max_spans_per_level: 100
# Number of highest-latency spans always included when sampling a level.
sampling_top_latency_count: 5
# Number of timestamp buckets used for uniform sampling within a level.
sampling_bucket_count: 50
# Threshold below which all spans are returned without windowing or sampling.
select_all_spans_limit: 100000
##################### Authz #################################
authz:

View File

@@ -6638,6 +6638,70 @@ components:
- attribute
- resource
type: string
SpantypesFlamegraphSpan:
properties:
attributes:
additionalProperties: {}
type: object
durationNano:
minimum: 0
type: integer
event:
items:
$ref: '#/components/schemas/SpantypesEvent'
type: array
hasError:
type: boolean
level:
format: int64
type: integer
name:
type: string
parentSpanId:
type: string
resource:
additionalProperties:
type: string
type: object
spanId:
type: string
timestamp:
minimum: 0
type: integer
required:
- spanId
- parentSpanId
- timestamp
- durationNano
- hasError
- name
- level
- event
- attributes
- resource
type: object
SpantypesGettableFlamegraphTrace:
properties:
endTimestampMillis:
format: int64
type: integer
hasMore:
type: boolean
spans:
items:
items:
$ref: '#/components/schemas/SpantypesFlamegraphSpan'
type: array
type: array
startTimestampMillis:
format: int64
type: integer
required:
- spans
- startTimestampMillis
- endTimestampMillis
- hasMore
type: object
SpantypesGettableSpanMapperGroups:
properties:
items:
@@ -6703,6 +6767,15 @@ components:
traceId:
type: string
type: object
SpantypesPostableFlamegraph:
properties:
selectFields:
items:
$ref: '#/components/schemas/TelemetrytypesTelemetryFieldKey'
type: array
selectedSpanId:
type: string
type: object
SpantypesPostableSpanMapper:
properties:
config:
@@ -20535,6 +20608,75 @@ paths:
summary: Put profile in Zeus for a deployment.
tags:
- zeus
/api/v3/traces/{traceID}/flamegraph:
post:
deprecated: false
description: Returns the flamegraph view of spans for a given trace ID.
operationId: GetFlamegraph
parameters:
- in: path
name: traceID
required: true
schema:
type: string
requestBody:
content:
application/json:
schema:
$ref: '#/components/schemas/SpantypesPostableFlamegraph'
responses:
"200":
content:
application/json:
schema:
properties:
data:
$ref: '#/components/schemas/SpantypesGettableFlamegraphTrace'
status:
type: string
required:
- status
- data
type: object
description: OK
"400":
content:
application/json:
schema:
$ref: '#/components/schemas/RenderErrorResponse'
description: Bad Request
"401":
content:
application/json:
schema:
$ref: '#/components/schemas/RenderErrorResponse'
description: Unauthorized
"403":
content:
application/json:
schema:
$ref: '#/components/schemas/RenderErrorResponse'
description: Forbidden
"404":
content:
application/json:
schema:
$ref: '#/components/schemas/RenderErrorResponse'
description: Not Found
"500":
content:
application/json:
schema:
$ref: '#/components/schemas/RenderErrorResponse'
description: Internal Server Error
security:
- api_key:
- VIEWER
- tokenizer:
- VIEWER
summary: Get flamegraph view for a trace
tags:
- tracedetail
/api/v3/traces/{traceID}/waterfall:
post:
deprecated: false

View File

@@ -7769,6 +7769,77 @@ export enum SpantypesFieldContextDTO {
attribute = 'attribute',
resource = 'resource',
}
export type SpantypesFlamegraphSpanDTOAttributes = { [key: string]: unknown };
export type SpantypesFlamegraphSpanDTOResource = { [key: string]: string };
export interface SpantypesFlamegraphSpanDTO {
/**
* @type object
*/
attributes: SpantypesFlamegraphSpanDTOAttributes;
/**
* @type integer
* @minimum 0
*/
durationNano: number;
/**
* @type array
*/
event: SpantypesEventDTO[];
/**
* @type boolean
*/
hasError: boolean;
/**
* @type integer
* @format int64
*/
level: number;
/**
* @type string
*/
name: string;
/**
* @type string
*/
parentSpanId: string;
/**
* @type object
*/
resource: SpantypesFlamegraphSpanDTOResource;
/**
* @type string
*/
spanId: string;
/**
* @type integer
* @minimum 0
*/
timestamp: number;
}
export interface SpantypesGettableFlamegraphTraceDTO {
/**
* @type integer
* @format int64
*/
endTimestampMillis: number;
/**
* @type boolean
*/
hasMore: boolean;
/**
* @type array
*/
spans: SpantypesFlamegraphSpanDTO[][];
/**
* @type integer
* @format int64
*/
startTimestampMillis: number;
}
export type SpantypesSpanMapperGroupConditionDTOAnyOf = {
/**
* @type array,null
@@ -8070,6 +8141,17 @@ export interface SpantypesGettableWaterfallTraceDTO {
uncollapsedSpans?: string[] | null;
}
export interface SpantypesPostableFlamegraphDTO {
/**
* @type array
*/
selectFields?: TelemetrytypesTelemetryFieldKeyDTO[];
/**
* @type string
*/
selectedSpanId?: string;
}
export enum SpantypesSpanMapperOperationDTO {
move = 'move',
copy = 'copy',
@@ -10424,6 +10506,17 @@ export type GetHosts200 = {
status: string;
};
export type GetFlamegraphPathParameters = {
traceID: string;
};
export type GetFlamegraph200 = {
data: SpantypesGettableFlamegraphTraceDTO;
/**
* @type string
*/
status: string;
};
export type GetWaterfallPathParameters = {
traceID: string;
};

View File

@@ -12,6 +12,8 @@ import type {
} from 'react-query';
import type {
GetFlamegraph200,
GetFlamegraphPathParameters,
GetTraceAggregations200,
GetTraceAggregationsPathParameters,
GetWaterfall200,
@@ -19,6 +21,7 @@ import type {
GetWaterfallV4200,
GetWaterfallV4PathParameters,
RenderErrorResponseDTO,
SpantypesPostableFlamegraphDTO,
SpantypesPostableTraceAggregationsDTO,
SpantypesPostableWaterfallDTO,
} from '../sigNoz.schemas';
@@ -126,6 +129,105 @@ export const useGetTraceAggregations = <
> => {
return useMutation(getGetTraceAggregationsMutationOptions(options));
};
/**
* Returns the flamegraph view of spans for a given trace ID.
* @summary Get flamegraph view for a trace
*/
export const getFlamegraph = (
{ traceID }: GetFlamegraphPathParameters,
spantypesPostableFlamegraphDTO?: BodyType<SpantypesPostableFlamegraphDTO>,
signal?: AbortSignal,
) => {
return GeneratedAPIInstance<GetFlamegraph200>({
url: `/api/v3/traces/${traceID}/flamegraph`,
method: 'POST',
headers: { 'Content-Type': 'application/json' },
data: spantypesPostableFlamegraphDTO,
signal,
});
};
export const getGetFlamegraphMutationOptions = <
TError = ErrorType<RenderErrorResponseDTO>,
TContext = unknown,
>(options?: {
mutation?: UseMutationOptions<
Awaited<ReturnType<typeof getFlamegraph>>,
TError,
{
pathParams: GetFlamegraphPathParameters;
data?: BodyType<SpantypesPostableFlamegraphDTO>;
},
TContext
>;
}): UseMutationOptions<
Awaited<ReturnType<typeof getFlamegraph>>,
TError,
{
pathParams: GetFlamegraphPathParameters;
data?: BodyType<SpantypesPostableFlamegraphDTO>;
},
TContext
> => {
const mutationKey = ['getFlamegraph'];
const { mutation: mutationOptions } = options
? options.mutation &&
'mutationKey' in options.mutation &&
options.mutation.mutationKey
? options
: { ...options, mutation: { ...options.mutation, mutationKey } }
: { mutation: { mutationKey } };
const mutationFn: MutationFunction<
Awaited<ReturnType<typeof getFlamegraph>>,
{
pathParams: GetFlamegraphPathParameters;
data?: BodyType<SpantypesPostableFlamegraphDTO>;
}
> = (props) => {
const { pathParams, data } = props ?? {};
return getFlamegraph(pathParams, data);
};
return { mutationFn, ...mutationOptions };
};
export type GetFlamegraphMutationResult = NonNullable<
Awaited<ReturnType<typeof getFlamegraph>>
>;
export type GetFlamegraphMutationBody =
| BodyType<SpantypesPostableFlamegraphDTO>
| undefined;
export type GetFlamegraphMutationError = ErrorType<RenderErrorResponseDTO>;
/**
* @summary Get flamegraph view for a trace
*/
export const useGetFlamegraph = <
TError = ErrorType<RenderErrorResponseDTO>,
TContext = unknown,
>(options?: {
mutation?: UseMutationOptions<
Awaited<ReturnType<typeof getFlamegraph>>,
TError,
{
pathParams: GetFlamegraphPathParameters;
data?: BodyType<SpantypesPostableFlamegraphDTO>;
},
TContext
>;
}): UseMutationResult<
Awaited<ReturnType<typeof getFlamegraph>>,
TError,
{
pathParams: GetFlamegraphPathParameters;
data?: BodyType<SpantypesPostableFlamegraphDTO>;
},
TContext
> => {
return useMutation(getGetFlamegraphMutationOptions(options));
};
/**
* Returns the waterfall view of spans for a given trace ID with tree structure, metadata, and windowed pagination
* @summary Get waterfall view for a trace

View File

@@ -96,14 +96,28 @@ function CreateOrEdit(props: CreateOrEditProps): JSX.Element {
return undefined;
}
const { domainToAdminEmailList, ...rest } = config;
const {
domainToAdminEmailList,
allowedGroups,
serviceAccountJson,
domainToAdminEmail: _domainToAdminEmail,
fetchTransitiveGroupMembership,
...rest
} = config;
const domainToAdminEmail = convertDomainMappingsToRecord(
domainToAdminEmailList,
);
return {
...rest,
domainToAdminEmail: domainToAdminEmail ?? {},
...(rest.fetchGroups
? {
allowedGroups,
serviceAccountJson,
domainToAdminEmail: domainToAdminEmail ?? {},
fetchTransitiveGroupMembership,
}
: { domainToAdminEmail: {} }),
};
}, [form]);
@@ -129,7 +143,7 @@ function CreateOrEdit(props: CreateOrEditProps): JSX.Element {
return {
...rest,
groupMappings: groupMappings ?? {},
groupMappings: rest.useRoleAttribute ? undefined : (groupMappings ?? {}),
};
}, [form]);

View File

@@ -0,0 +1,195 @@
import { fireEvent, render, screen, waitFor } from 'tests/test-utils';
import { rest, server } from 'mocks-server/server';
import { AuthtypesGettableAuthDomainDTO } from 'api/generated/services/sigNoz.schemas';
import CreateEdit from '../CreateEdit/CreateEdit';
import {
AUTH_DOMAINS_UPDATE_ENDPOINT,
mockDomainWithRoleMapping,
mockGoogleAuthDomain,
mockGoogleAuthWithWorkspaceGroups,
mockOidcWithClaimMapping,
mockSamlWithAttributeMapping,
mockUpdateSuccessResponse,
} from './mocks';
// @signozhq/ui/button internal effects block form.validateFields() in tests
jest.mock('@signozhq/ui/button', () => ({
...jest.requireActual('@signozhq/ui/button'),
Button: ({
children,
onClick,
loading,
disabled,
'aria-label': ariaLabel,
prefix,
suffix,
}: {
children?: React.ReactNode;
onClick?: React.MouseEventHandler<HTMLButtonElement>;
loading?: boolean;
disabled?: boolean;
'aria-label'?: string;
prefix?: React.ReactNode;
suffix?: React.ReactNode;
}) => (
<button
type="button"
onClick={onClick}
disabled={disabled || loading}
aria-label={ariaLabel}
>
{prefix}
{children}
{suffix}
</button>
),
}));
type SavedPayload = {
config: {
googleAuthConfig?: Record<string, unknown>;
samlConfig?: Record<string, unknown>;
oidcConfig?: Record<string, unknown>;
roleMapping?: Record<string, unknown>;
};
};
async function submitForm(
record: AuthtypesGettableAuthDomainDTO,
): Promise<SavedPayload> {
const requests: SavedPayload[] = [];
server.use(
rest.put(AUTH_DOMAINS_UPDATE_ENDPOINT, async (req, res, ctx) => {
requests.push((await req.json()) as SavedPayload);
return res(ctx.status(200), ctx.json(mockUpdateSuccessResponse));
}),
);
render(<CreateEdit isCreate={false} record={record} onClose={jest.fn()} />);
fireEvent.click(screen.getByRole('button', { name: /save changes/i }));
await waitFor(() => expect(requests).toHaveLength(1));
return requests[0];
}
describe('CreateEdit — payload sanitization', () => {
afterEach(() => server.resetHandlers());
describe('Google Auth', () => {
it('sends core fields and omits workspace fields when fetchGroups is not set', async () => {
const payload = await submitForm(mockGoogleAuthDomain);
const g = payload.config.googleAuthConfig;
expect(g?.clientId).toBe('test-client-id');
expect(g?.clientSecret).toBe('test-client-secret');
expect(g?.allowedGroups).toBeUndefined();
expect(g?.serviceAccountJson).toBeUndefined();
expect(g?.fetchTransitiveGroupMembership).toBeUndefined();
expect(g?.domainToAdminEmail).toStrictEqual({});
});
it('strips workspace fields when fetchGroups is false', async () => {
const payload = await submitForm({
...mockGoogleAuthWithWorkspaceGroups,
config: {
...mockGoogleAuthWithWorkspaceGroups.config,
googleAuthConfig: {
...mockGoogleAuthWithWorkspaceGroups.config?.googleAuthConfig,
fetchGroups: false,
},
},
});
const g = payload.config.googleAuthConfig;
expect(g?.fetchGroups).toBe(false);
expect(g?.allowedGroups).toBeUndefined();
expect(g?.serviceAccountJson).toBeUndefined();
expect(g?.fetchTransitiveGroupMembership).toBeUndefined();
expect(g?.domainToAdminEmail).toStrictEqual({});
});
it('includes all workspace fields when fetchGroups is true', async () => {
const payload = await submitForm(mockGoogleAuthWithWorkspaceGroups);
const g = payload.config.googleAuthConfig;
expect(g?.fetchGroups).toBe(true);
expect(g?.serviceAccountJson).toBe('{"type": "service_account"}');
expect(g?.fetchTransitiveGroupMembership).toBe(true);
expect(g?.allowedGroups).toStrictEqual([
'allowed-group-1',
'allowed-group-2',
]);
expect(g?.domainToAdminEmail).toStrictEqual({
'google-groups.com': 'admin@google-groups.com',
});
});
});
describe('SAML', () => {
it('sends core and attributeMapping fields', async () => {
const payload = await submitForm(mockSamlWithAttributeMapping);
const s = payload.config.samlConfig;
expect(s?.samlIdp).toBe('https://idp.saml-attrs.com/sso');
expect(s?.samlEntity).toBe('urn:saml-attrs:idp');
expect(s?.samlCert).toBe('MOCK_CERTIFICATE_ATTRS');
expect(s?.insecureSkipAuthNRequestsSigned).toBe(true);
const attr = s?.attributeMapping as Record<string, unknown>;
expect(attr?.name).toBe('user_display_name');
expect(attr?.groups).toBe('member_of');
expect(attr?.role).toBe('signoz_role');
});
});
describe('OIDC', () => {
it('sends all fields including claimMapping', async () => {
const payload = await submitForm(mockOidcWithClaimMapping);
const o = payload.config.oidcConfig;
expect(o?.issuer).toBe('https://oidc.claims.com');
expect(o?.issuerAlias).toBe('https://alias.claims.com');
expect(o?.clientId).toBe('claims-client-id');
expect(o?.clientSecret).toBe('claims-client-secret');
expect(o?.insecureSkipEmailVerified).toBe(true);
expect(o?.getUserInfo).toBe(true);
const claim = o?.claimMapping as Record<string, unknown>;
expect(claim?.email).toBe('user_email');
expect(claim?.name).toBe('display_name');
expect(claim?.groups).toBe('user_groups');
expect(claim?.role).toBe('user_role');
});
});
describe('Role Mapping', () => {
it('strips groupMappings when useRoleAttribute is true', async () => {
const payload = await submitForm({
...mockDomainWithRoleMapping,
config: {
...mockDomainWithRoleMapping.config,
roleMapping: {
...mockDomainWithRoleMapping.config?.roleMapping,
useRoleAttribute: true,
},
},
});
expect(payload.config.roleMapping?.useRoleAttribute).toBe(true);
expect(payload.config.roleMapping?.groupMappings).toBeUndefined();
});
it('sends groupMappings when useRoleAttribute is false', async () => {
const payload = await submitForm(mockDomainWithRoleMapping);
expect(payload.config.roleMapping?.useRoleAttribute).toBe(false);
expect(payload.config.roleMapping?.groupMappings).toStrictEqual({
'admin-group': 'ADMIN',
'dev-team': 'EDITOR',
viewers: 'VIEWER',
});
});
});
});

View File

@@ -67,5 +67,24 @@ func (provider *provider) addTraceDetailRoutes(router *mux.Router) error {
return err
}
if err := router.Handle("/api/v3/traces/{traceID}/flamegraph", handler.New(
provider.authzMiddleware.ViewAccess(provider.traceDetailHandler.GetFlamegraph),
handler.OpenAPIDef{
ID: "GetFlamegraph",
Tags: []string{"tracedetail"},
Summary: "Get flamegraph view for a trace",
Description: "Returns the flamegraph view of spans for a given trace ID.",
Request: new(spantypes.PostableFlamegraph),
RequestContentType: "application/json",
Response: new(spantypes.GettableFlamegraphTrace),
ResponseContentType: "application/json",
SuccessStatusCode: http.StatusOK,
ErrorStatusCodes: []int{http.StatusBadRequest, http.StatusNotFound},
SecuritySchemes: newSecuritySchemes(types.RoleViewer),
},
)).Methods(http.MethodPost).GetError(); err != nil {
return err
}
return nil
}

View File

@@ -6,7 +6,16 @@ import (
)
type Config struct {
Waterfall WaterfallConfig `mapstructure:"waterfall"`
Waterfall WaterfallConfig `mapstructure:"waterfall"`
Flamegraph FlamegraphConfig `mapstructure:"flamegraph"`
}
type FlamegraphConfig struct {
MaxSelectedLevels int `mapstructure:"max_selected_levels"`
MaxSpansPerLevel int `mapstructure:"max_spans_per_level"`
SamplingTopLatencySpansCount int `mapstructure:"sampling_top_latency_count"`
SamplingBucketCount int `mapstructure:"sampling_bucket_count"`
SelectAllSpansLimit uint `mapstructure:"select_all_spans_limit"`
}
type WaterfallConfig struct {
@@ -29,6 +38,13 @@ func newConfig() factory.Config {
MaxDepthToAutoExpand: 5,
MaxLimitToSelectAllSpans: 10_000,
},
Flamegraph: FlamegraphConfig{
MaxSelectedLevels: 50,
MaxSpansPerLevel: 100,
SamplingTopLatencySpansCount: 5,
SamplingBucketCount: 50,
SelectAllSpansLimit: 100_000,
},
}
}
@@ -42,5 +58,20 @@ func (c Config) Validate() error {
if c.Waterfall.MaxLimitToSelectAllSpans == 0 {
return errors.NewInvalidInputf(errors.CodeInvalidInput, "traces.waterfall.max_limit_to_select_all_spans must be positive")
}
if c.Flamegraph.MaxSelectedLevels <= 0 {
return errors.NewInvalidInputf(errors.CodeInvalidInput, "tracedetail.flamegraph.level_limit must be positive, got %d", c.Flamegraph.MaxSelectedLevels)
}
if c.Flamegraph.MaxSpansPerLevel <= 0 {
return errors.NewInvalidInputf(errors.CodeInvalidInput, "tracedetail.flamegraph.spans_per_level must be positive, got %d", c.Flamegraph.MaxSpansPerLevel)
}
if c.Flamegraph.SamplingTopLatencySpansCount < 0 {
return errors.NewInvalidInputf(errors.CodeInvalidInput, "tracedetail.flamegraph.top_latency_count cannot be negative, got %d", c.Flamegraph.SamplingTopLatencySpansCount)
}
if c.Flamegraph.SamplingBucketCount <= 0 {
return errors.NewInvalidInputf(errors.CodeInvalidInput, "tracedetail.flamegraph.bucket_count must be positive, got %d", c.Flamegraph.SamplingBucketCount)
}
if c.Flamegraph.SelectAllSpansLimit == 0 {
return errors.NewInvalidInputf(errors.CodeInvalidInput, "tracedetail.flamegraph.max_limit_to_select_all_spans must be positive")
}
return nil
}

View File

@@ -80,3 +80,19 @@ func (h *handler) GetTraceAggregations(rw http.ResponseWriter, r *http.Request)
render.Success(rw, http.StatusOK, result)
}
func (h *handler) GetFlamegraph(rw http.ResponseWriter, r *http.Request) {
req := new(spantypes.PostableFlamegraph)
if err := binding.JSON.BindBody(r.Body, req); err != nil {
render.Error(rw, err)
return
}
result, err := h.module.GetFlamegraph(r.Context(), mux.Vars(r)["traceID"], req.SelectedSpanID, req.SelectFields)
if err != nil {
render.Error(rw, err)
return
}
render.Success(rw, http.StatusOK, result)
}

View File

@@ -7,6 +7,7 @@ import (
"github.com/SigNoz/signoz/pkg/factory"
"github.com/SigNoz/signoz/pkg/modules/tracedetail"
"github.com/SigNoz/signoz/pkg/types/spantypes"
"github.com/SigNoz/signoz/pkg/types/telemetrytypes"
"go.opentelemetry.io/otel/metric"
)
@@ -164,6 +165,17 @@ func (m *module) GetTraceAggregations(ctx context.Context, traceID string, req *
return &spantypes.GettableTraceAggregations{Aggregations: results}, nil
}
func (m *module) GetFlamegraph(ctx context.Context, traceID string, selectedSpanID string, selectFields []telemetrytypes.TelemetryFieldKey) (*spantypes.GettableFlamegraphTrace, error) {
summary, err := m.store.GetTraceSummary(ctx, traceID)
if err != nil {
return nil, err
}
if summary.NumSpans <= uint64(m.config.Flamegraph.SelectAllSpansLimit) {
return m.getFullFlamegraph(ctx, traceID, summary, selectFields)
}
return m.getWindowedFlamegraph(ctx, traceID, selectedSpanID, summary, selectFields)
}
// getWindowedWaterfall builds the waterfall tree with minimal data and then returns only a window of full spans.
func (m *module) getWindowedWaterfall(ctx context.Context, traceID, selectedSpanID string, uncollapsedSpans []string, start, end time.Time) (*spantypes.GettableWaterfallTrace, error) {
// Step 1: minimal fetch → build full tree → select visible window
@@ -204,3 +216,47 @@ func (m *module) getWindowedWaterfall(ctx context.Context, traceID, selectedSpan
waterfallTrace, selectedSpans, uncollapsedSpans, false, nil,
), nil
}
func (m *module) getFullFlamegraph(ctx context.Context, traceID string, summary *spantypes.TraceSummary, selectFields []telemetrytypes.TelemetryFieldKey) (*spantypes.GettableFlamegraphTrace, error) {
fullSpans, err := m.store.GetFlamegraphSpans(ctx, traceID, summary.Start, summary.End, nil)
if err != nil {
return nil, err
}
if len(fullSpans) == 0 {
return nil, spantypes.ErrTraceNotFound
}
flamegraphTrace := spantypes.NewFlamegraphTraceFromStorable(fullSpans, selectFields)
return spantypes.NewGettableFlamegraphTrace(flamegraphTrace.GetAllLevels(), summary.Start.UnixMilli(), summary.End.UnixMilli(), false), nil
}
// getWindowedFlamegraph returns a window of a max levels and max sampled spans per level around the selected span.
func (m *module) getWindowedFlamegraph(ctx context.Context, traceID, selectedSpanID string, summary *spantypes.TraceSummary, selectFields []telemetrytypes.TelemetryFieldKey) (*spantypes.GettableFlamegraphTrace, error) {
minimalSpans, err := m.store.GetMinimalSpans(ctx, traceID, summary.Start, summary.End)
if err != nil {
return nil, err
}
if len(minimalSpans) == 0 {
return nil, spantypes.ErrTraceNotFound
}
flamegraphTrace := spantypes.NewFlamegraphTraceFromMinimal(minimalSpans)
minimalSpans = nil //nolint:ineffassign,wastedassign // release backing array before further db calls
cfg := m.config.Flamegraph
selectedSpans := flamegraphTrace.GetSelectedLevels(selectedSpanID, cfg.MaxSelectedLevels, cfg.MaxSpansPerLevel, cfg.SamplingTopLatencySpansCount, cfg.SamplingBucketCount)
if len(selectedSpans) == 0 {
return nil, spantypes.ErrTraceNotFound
}
fullSpans, err := m.store.GetFlamegraphSpans(ctx, traceID, summary.Start, summary.End, spantypes.FlamegraphWindowSpanIDs(selectedSpans))
if err != nil {
return nil, err
}
return spantypes.NewGettableFlamegraphTrace(
flamegraphTrace.EnrichSelectedSpans(selectedSpans, fullSpans, selectFields),
summary.Start.UnixMilli(),
summary.End.UnixMilli(),
true,
), nil
}

View File

@@ -154,6 +154,47 @@ func (s *traceStore) GetTraceSpansByIDs(ctx context.Context, traceID string, sta
return spans, nil
}
func (s *traceStore) GetFlamegraphSpans(ctx context.Context, traceID string, start, end time.Time, spanIDs []string) ([]spantypes.StorableSpan, error) {
sb := sqlbuilder.NewSelectBuilder()
sb.Select(
"span_id",
"any(parent_span_id) AS parent_span_id",
"any(timestamp) AS timestamp",
"any(duration_nano) AS duration_nano",
"any(has_error) AS has_error",
"any(name) AS name",
"any(events) AS events",
"any(attributes_string) AS attributes_string",
"any(attributes_number) AS attributes_number",
"any(attributes_bool) AS attributes_bool",
"any(resources_string) AS resources_string",
)
sb.From(fmt.Sprintf("%s.%s", spantypes.TraceDB, spantypes.TraceTable))
conditions := []string{
sb.E("trace_id", traceID),
sb.GE("ts_bucket_start", start.Unix()-1800),
sb.LE("ts_bucket_start", end.Unix()),
}
if len(spanIDs) > 0 {
ids := make([]any, len(spanIDs))
for i, id := range spanIDs {
ids[i] = id
}
conditions = append(conditions, sb.In("span_id", ids...))
}
sb.Where(conditions...)
sb.GroupBy("span_id")
sb.OrderByAsc("timestamp")
sb.OrderByAsc("name")
query, args := sb.BuildWithFlavor(sqlbuilder.ClickHouse)
var spans []spantypes.StorableSpan
if err := s.telemetryStore.ClickhouseDB().Select(ctx, &spans, query, args...); err != nil {
return nil, errors.WrapInternalf(err, errors.CodeInternal, "error querying flamegraph spans")
}
return spans, nil
}
func (s *traceStore) GetSpanCountByField(ctx context.Context, traceID string, summary *spantypes.TraceSummary, fieldKey telemetrytypes.TelemetryFieldKey) (map[string]uint64, error) {
fieldExpr, err := buildFieldExpr(fieldKey)
if err != nil {

View File

@@ -91,6 +91,30 @@ func TestGetSpanCountByField(t *testing.T) {
}
}
func TestGetFlamegraphSpans(t *testing.T) {
baseSQL := "SELECT span_id, any(parent_span_id) AS parent_span_id, any(timestamp) AS timestamp, any(duration_nano) AS duration_nano, any(has_error) AS has_error, any(name) AS name, any(events) AS events, any(attributes_string) AS attributes_string, any(attributes_number) AS attributes_number, any(attributes_bool) AS attributes_bool, any(resources_string) AS resources_string FROM signoz_traces.distributed_signoz_index_v3 WHERE trace_id = ? AND ts_bucket_start >= ? AND ts_bucket_start <= ? GROUP BY span_id ORDER BY timestamp ASC, name ASC"
withSpanIDsSQL := "SELECT span_id, any(parent_span_id) AS parent_span_id, any(timestamp) AS timestamp, any(duration_nano) AS duration_nano, any(has_error) AS has_error, any(name) AS name, any(events) AS events, any(attributes_string) AS attributes_string, any(attributes_number) AS attributes_number, any(attributes_bool) AS attributes_bool, any(resources_string) AS resources_string FROM signoz_traces.distributed_signoz_index_v3 WHERE trace_id = ? AND ts_bucket_start >= ? AND ts_bucket_start <= ? AND span_id IN (?, ?) GROUP BY span_id ORDER BY timestamp ASC, name ASC"
tests := []struct {
name string
spanIDs []string
sql string
}{
{name: "NoSpanIDs_GeneratesBaseSQL", spanIDs: nil, sql: baseSQL},
{name: "WithSpanIDs_GeneratesInClauseSQL", spanIDs: []string{"span-1", "span-2"}, sql: withSpanIDsSQL},
}
for _, tc := range tests {
t.Run(tc.name, func(t *testing.T) {
s := newTestStore(sqlmock.QueryMatcherRegexp)
s.Mock().ExpectSelect(regexp.QuoteMeta(tc.sql)).
WillReturnRows(cmock.NewRows(nil, nil))
_, _ = s.Store().GetFlamegraphSpans(context.Background(), testTraceID, testStart, testEnd, tc.spanIDs)
assert.NoError(t, s.Mock().ExpectationsWereMet())
})
}
}
func TestGetSpanDurationByField(t *testing.T) {
expectedSQL := "WITH all_spans AS (SELECT DISTINCT ON (span_id) resource.`service.name`::String AS field_value, toUnixTimestamp64Nano(timestamp) AS start_ns, start_ns + duration_nano AS end_ns FROM signoz_traces.distributed_signoz_index_v3 WHERE trace_id = ? AND ts_bucket_start >= ? AND ts_bucket_start <= ? AND notEmpty(field_value) ORDER BY timestamp ASC, name ASC), effective_start AS (SELECT field_value, end_ns, greatest(start_ns, ifNull(max(end_ns) OVER (PARTITION BY field_value ORDER BY start_ns ROWS BETWEEN UNBOUNDED PRECEDING AND 1 PRECEDING), toUInt64(0))) AS effective_start_ns FROM all_spans) SELECT field_value, sum(toUInt64(greatest(end_ns - effective_start_ns, 0))) AS total_ns FROM effective_start GROUP BY field_value"

View File

@@ -5,6 +5,7 @@ import (
"net/http"
"github.com/SigNoz/signoz/pkg/types/spantypes"
"github.com/SigNoz/signoz/pkg/types/telemetrytypes"
)
// Handler exposes HTTP handlers for trace detail APIs.
@@ -12,6 +13,7 @@ type Handler interface {
GetWaterfall(http.ResponseWriter, *http.Request)
GetWaterfallV4(http.ResponseWriter, *http.Request)
GetTraceAggregations(http.ResponseWriter, *http.Request)
GetFlamegraph(http.ResponseWriter, *http.Request)
}
// Module defines the business logic for trace detail operations.
@@ -19,4 +21,5 @@ type Module interface {
GetWaterfall(ctx context.Context, traceID string, req *spantypes.PostableWaterfall) (*spantypes.GettableWaterfallTrace, error)
GetWaterfallV4(ctx context.Context, traceID string, selectedSpanID string, uncollapsedSpans []string, selectAllLimit uint) (*spantypes.GettableWaterfallTrace, error)
GetTraceAggregations(ctx context.Context, traceID string, req *spantypes.PostableTraceAggregations) (*spantypes.GettableTraceAggregations, error)
GetFlamegraph(ctx context.Context, traceID string, selectedSpanID string, selectFields []telemetrytypes.TelemetryFieldKey) (*spantypes.GettableFlamegraphTrace, error)
}

View File

@@ -0,0 +1,102 @@
package spantypes
import (
"github.com/SigNoz/signoz/pkg/types/telemetrytypes"
)
type FlamegraphSpan struct {
SpanID string `json:"spanId" required:"true"`
ParentSpanID string `json:"parentSpanId" required:"true"`
Timestamp uint64 `json:"timestamp" required:"true"`
DurationNano uint64 `json:"durationNano" required:"true"`
HasError bool `json:"hasError" required:"true"`
Name string `json:"name" required:"true"`
Level int64 `json:"level" required:"true"`
Events []Event `json:"event" required:"true" nullable:"false"`
Attributes map[string]any `json:"attributes" required:"true" nullable:"false"`
Resource map[string]string `json:"resource" required:"true" nullable:"false"`
Children []*FlamegraphSpan `json:"-"` // internal tree use only
}
// FlamegraphLevel groups span IDs at a single level within the selected window.
type FlamegraphLevel struct {
Level int64
SpanIDs []string
}
type PostableFlamegraph struct {
SelectedSpanID string `json:"selectedSpanId"`
SelectFields []telemetrytypes.TelemetryFieldKey `json:"selectFields,omitempty"`
}
// GettableFlamegraphTrace is the response for the v3 flamegraph API.
type GettableFlamegraphTrace struct {
Spans [][]*FlamegraphSpan `json:"spans" required:"true" nullable:"false"`
StartTimestampMillis int64 `json:"startTimestampMillis" required:"true"`
EndTimestampMillis int64 `json:"endTimestampMillis" required:"true"`
HasMore bool `json:"hasMore" required:"true"`
}
func NewGettableFlamegraphTrace(spans [][]*FlamegraphSpan, startMs, endMs int64, hasMore bool) *GettableFlamegraphTrace {
return &GettableFlamegraphTrace{
Spans: spans,
StartTimestampMillis: startMs,
EndTimestampMillis: endMs,
HasMore: hasMore,
}
}
func NewFlamegraphSpanFromStorable(s *StorableSpan, level int64, selectFields []telemetrytypes.TelemetryFieldKey) *FlamegraphSpan {
span := &FlamegraphSpan{
SpanID: s.SpanID,
ParentSpanID: s.ParentSpanID,
Timestamp: uint64(s.StartTime.UnixNano()),
DurationNano: s.DurationNano,
HasError: s.HasError,
Name: s.Name,
Level: level,
Events: s.UnmarshalledEvents(),
Attributes: make(map[string]any),
Resource: make(map[string]string),
}
if len(selectFields) == 0 {
return span
}
for _, field := range selectFields {
switch field.FieldContext {
case telemetrytypes.FieldContextResource:
if v, ok := s.ResourcesString[field.Name]; ok && v != "" {
span.Resource[field.Name] = v
}
case telemetrytypes.FieldContextAttribute:
if v := s.AttributeValue(field.Name); v != nil {
span.Attributes[field.Name] = v
}
}
}
return span
}
func NewMissingParentFlamegraphSpan(node *FlamegraphSpan) *FlamegraphSpan {
return &FlamegraphSpan{
SpanID: node.ParentSpanID,
Name: "Missing Span",
Timestamp: node.Timestamp,
DurationNano: node.DurationNano,
Events: []Event{},
Children: []*FlamegraphSpan{node},
}
}
// FlamegraphWindowSpanIDs collects all span IDs from a level window into a flat slice.
func FlamegraphWindowSpanIDs(window []FlamegraphLevel) []string {
total := 0
for _, lvl := range window {
total += len(lvl.SpanIDs)
}
ids := make([]string, 0, total)
for _, lvl := range window {
ids = append(ids, lvl.SpanIDs...)
}
return ids
}

View File

@@ -0,0 +1,111 @@
package spantypes
import (
"sort"
"github.com/SigNoz/signoz/pkg/types/telemetrytypes"
)
// FlamegraphTrace holds the level wise tree built from minimal spans.
type FlamegraphTrace struct {
roots []*FlamegraphSpan
nodeByID map[string]*FlamegraphSpan
startTime uint64
endTime uint64
}
func NewFlamegraphTraceFromMinimal(spans []MinimalSpan) *FlamegraphTrace {
t := &FlamegraphTrace{
nodeByID: make(map[string]*FlamegraphSpan, len(spans)),
}
for i := range spans {
node := spans[i].ToFlamegraphSpan()
t.updateTimeRange(node.Timestamp, node.DurationNano)
t.nodeByID[node.SpanID] = node
}
t.buildSpanTree()
return t
}
func NewFlamegraphTraceFromStorable(spans []StorableSpan, selectFields []telemetrytypes.TelemetryFieldKey) *FlamegraphTrace {
t := &FlamegraphTrace{
nodeByID: make(map[string]*FlamegraphSpan, len(spans)),
}
for i := range spans {
node := NewFlamegraphSpanFromStorable(&spans[i], 0, selectFields) // level is set later by BFS
t.updateTimeRange(node.Timestamp, node.DurationNano)
t.nodeByID[node.SpanID] = node
}
t.buildSpanTree()
return t
}
func (t *FlamegraphTrace) GetAllLevels() [][]*FlamegraphSpan {
return nil
}
// GetSelectedLevels returns the window of levels around selectedSpanID with sampling applied to dense levels.
func (t *FlamegraphTrace) GetSelectedLevels(selectedSpanID string, levelLimit, spansPerLevel, topLatencyCount, bucketCount int) []FlamegraphLevel {
return nil
}
func (t *FlamegraphTrace) EnrichSelectedSpans(selectedSpans []FlamegraphLevel, fullSpans []StorableSpan, selectFields []telemetrytypes.TelemetryFieldKey) [][]*FlamegraphSpan {
fullByID := make(map[string]*StorableSpan, len(fullSpans))
for i := range fullSpans {
fullByID[fullSpans[i].SpanID] = &fullSpans[i]
}
result := make([][]*FlamegraphSpan, len(selectedSpans))
for i, lvl := range selectedSpans {
result[i] = make([]*FlamegraphSpan, 0, len(lvl.SpanIDs))
for _, spanID := range lvl.SpanIDs {
if full, ok := fullByID[spanID]; ok {
result[i] = append(result[i], NewFlamegraphSpanFromStorable(full, lvl.Level, selectFields))
} else if lean, ok := t.nodeByID[spanID]; ok {
result[i] = append(result[i], lean)
}
}
}
return result
}
func (t *FlamegraphTrace) updateTimeRange(timestamp, durationNano uint64) {
if t.startTime == 0 || timestamp < t.startTime {
t.startTime = timestamp
}
if end := timestamp + durationNano; end > t.endTime {
t.endTime = end
}
}
func (t *FlamegraphTrace) buildSpanTree() {
for _, node := range t.nodeByID {
if node.ParentSpanID != "" {
if parent, ok := t.nodeByID[node.ParentSpanID]; ok {
parent.Children = append(parent.Children, node)
} else {
missing := NewMissingParentFlamegraphSpan(node)
t.nodeByID[missing.SpanID] = missing
t.roots = append(t.roots, missing)
}
} else if flamegraphSpanIndex(t.roots, node.SpanID) == -1 {
t.roots = append(t.roots, node)
}
}
sort.Slice(t.roots, func(i, j int) bool {
if t.roots[i].Timestamp == t.roots[j].Timestamp {
return t.roots[i].SpanID < t.roots[j].SpanID
}
return t.roots[i].Timestamp < t.roots[j].Timestamp
})
}
func flamegraphSpanIndex(spans []*FlamegraphSpan, spanID string) int {
for i, s := range spans {
if s.SpanID == spanID {
return i
}
}
return -1
}

View File

@@ -30,6 +30,7 @@ type TraceStore interface {
GetTraceSpans(ctx context.Context, traceID string, summary *TraceSummary) ([]StorableSpan, error)
GetMinimalSpans(ctx context.Context, traceID string, start, end time.Time) ([]MinimalSpan, error)
GetTraceSpansByIDs(ctx context.Context, traceID string, start, end time.Time, spanIDs []string) ([]StorableSpan, error)
GetFlamegraphSpans(ctx context.Context, traceID string, start, end time.Time, spanIDs []string) ([]StorableSpan, error)
GetSpanCountByField(ctx context.Context, traceID string, summary *TraceSummary, fieldKey telemetrytypes.TelemetryFieldKey) (map[string]uint64, error)
GetSpanDurationByField(ctx context.Context, traceID string, summary *TraceSummary, fieldKey telemetrytypes.TelemetryFieldKey) (map[string]uint64, error)

View File

@@ -164,6 +164,17 @@ func (item *MinimalSpan) ToWaterfallSpan(traceID string) *WaterfallSpan {
}
}
func (item *MinimalSpan) ToFlamegraphSpan() *FlamegraphSpan {
return &FlamegraphSpan{
SpanID: item.SpanID,
ParentSpanID: item.ParentSpanID,
Timestamp: uint64(item.StartTime.UnixNano()),
DurationNano: item.DurationNano,
HasError: item.HasError,
Children: make([]*FlamegraphSpan, 0),
}
}
// NewMissingWaterfallSpan creates a synthetic placeholder span for a parent that has no recorded data.
func NewMissingWaterfallSpan(spanID, traceID string, timeUnixNano, durationNano uint64) *WaterfallSpan {
return &WaterfallSpan{
@@ -267,6 +278,19 @@ func (ws *WaterfallSpan) getPathToSelectedSpanID(selectedSpanID string) ([]strin
return nil, false
}
func (item *StorableSpan) AttributeValue(name string) any {
if v, ok := item.AttributesString[name]; ok {
return v
}
if v, ok := item.AttributesNumber[name]; ok {
return v
}
if v, ok := item.AttributesBool[name]; ok {
return v
}
return nil
}
func (item *StorableSpan) Attributes() map[string]any {
attributes := make(map[string]any, len(item.AttributesString)+len(item.AttributesNumber)+len(item.AttributesBool))
for k, v := range item.AttributesString {
@@ -296,7 +320,7 @@ func (item *StorableSpan) UnmarshalledEvents() []Event {
func (item *StorableSpan) UnmarshalledRefs() []OtelSpanRef {
refs := []OtelSpanRef{}
if err := json.Unmarshal([]byte(item.References), &refs); err != nil {
return nil // skip malformed values
return []OtelSpanRef{} // skip malformed values
}
return refs
}