Compare commits

..

2 Commits

Author SHA1 Message Date
Nikhil Soni
c4d3bb265d fix(flamegraph): two-pass fetch to avoid loading events for all spans
The previous approach fetched all span data (including events) in a single
query, then returned only a windowed subset (~5000 spans). For traces with
200k spans and 2 events avg, this caused ~440MB of allocation just for
events that were never sent to the client.

New flow (always applied, cache-miss path):
- Pass 1: skeleton query fetches only span_id, parent_span_id, timestamp,
  duration_nano, has_error, service_name, name for ALL spans. No events,
  no attribute maps. Builds the complete BFS tree and selects the response
  window with minimal memory.
- Pass 2: hydration query fetches events (and attribute maps if SelectFields
  is set) only for the span IDs that survive window selection and sampling
  (~5000 max for windowed traces). Uses named parameters with IN @spanIDs.

Memory impact:
- Large traces (200k spans, windowed): ~9x reduction — tree building uses
  ~40MB skeleton instead of ~400MB with events
- Small traces (all spans returned): marginal memory benefit from leaner
  tree-building phase; ~10-50ms extra latency from the second round-trip

Also inlines the trace summary query (previously inside GetSpansForTrace)
so time bounds are shared between both passes without calling the shared
helper.

Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
2026-05-18 00:18:03 +05:30
Nikhil Soni
31d6bfda35 fix(flamegraph): reduce memory footprint - quick wins
- Switch tree-building from links/references to parent_span_id: eliminates
  per-span JSON parsing and removes References from the in-memory struct
  and JSON response
- Add DISTINCT ON (span_id) to the flamegraph SELECT query to avoid
  processing duplicate span rows
- Smart SelectFields column projection: only fetch attribute maps
  (string/number/bool) or resources_string when the corresponding field
  context is actually requested, rather than always fetching all four maps
- Free searchScanResponses after span construction and spanIdToSpanNodeMap
  after BFS traversal so GC can reclaim them before window selection
- Replace References-based tree-building loop with parent_span_id check,
  eliminating the O(n²) ContainsFlamegraphSpan scan for root detection
- Remove unused fields from FlamegraphSpan: TraceID (never read by the
  flamegraph frontend) and References (replaced by ParentSpanID); Children
  kept on struct for BFS but hidden from JSON (json:"-")
- Add ParentSpanID to FlamegraphSpan response (field was already declared
  in the TypeScript type but was never populated by the backend)
- Remove traceId from FlamegraphSpan TypeScript type and test mocks

Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
2026-05-18 00:13:05 +05:30
14 changed files with 181 additions and 299 deletions

View File

@@ -1,6 +1,6 @@
import { useMemo, useState } from 'react';
import { useQueryClient } from 'react-query';
import { Redirect, useHistory, useLocation } from 'react-router-dom';
import { useHistory, useLocation } from 'react-router-dom';
import { Trash2 } from '@signozhq/icons';
import { Button } from '@signozhq/ui/button';
import { toast } from '@signozhq/ui/sonner';
@@ -26,9 +26,7 @@ import type { AuthzResources } from '../utils';
import ErrorInPlace from 'components/ErrorInPlace/ErrorInPlace';
import ROUTES from 'constants/routes';
import { capitalize } from 'lodash-es';
import { useAppContext } from 'providers/App/App';
import { useErrorModal } from 'providers/ErrorModalProvider';
import { LicenseStatus } from 'types/api/licensesV3/getActive';
import { RoleType } from 'types/roles';
import { handleApiError, toAPIError } from 'utils/errorUtils';
@@ -54,9 +52,8 @@ function RoleDetailsPage(): JSX.Element {
const queryClient = useQueryClient();
const { showErrorModal } = useErrorModal();
const { activeLicense, isFetchingActiveLicense } = useAppContext();
const authzResources: AuthzResources = permissionsType.data;
const authzResources = permissionsType.data as unknown as AuthzResources;
// Extract roleId from URL pathname since useParams doesn't work in nested routing
const roleIdMatch = pathname.match(ROLE_ID_REGEX);
@@ -161,22 +158,6 @@ function RoleDetailsPage(): JSX.Element {
},
});
if (isFetchingActiveLicense) {
return (
<div className="role-details-page">
<Skeleton
active
paragraph={{ rows: 8 }}
className="role-details-skeleton"
/>
</div>
);
}
if (activeLicense?.status !== LicenseStatus.VALID) {
return <Redirect to={ROUTES.ROLES_SETTINGS} />;
}
if (!hasReadPermission && readPerms !== null) {
return <PermissionDeniedFullPage permissionName="role:read" />;
}

View File

@@ -5,7 +5,6 @@ import {
} from 'mocks-server/__mockdata__/roles';
import { server } from 'mocks-server/server';
import { rest } from 'msw';
import { Route, Switch } from 'react-router-dom';
import {
fireEvent,
render,
@@ -16,7 +15,6 @@ import {
} from 'tests/test-utils';
import { useAuthZ } from 'hooks/useAuthZ/useAuthZ';
import {
invalidLicense,
mockUseAuthZDenyAll,
mockUseAuthZGrantAll,
} from 'tests/authz-test-utils';
@@ -232,28 +230,6 @@ describe('RoleDetailsPage', () => {
).resolves.toBeInTheDocument();
});
it('redirects to the roles list when license is not valid', async () => {
render(
<Switch>
<Route path="/settings/roles/:roleId">
<RoleDetailsPage />
</Route>
<Route path="/settings/roles" exact>
<div data-testid="roles-list-redirect-target" />
</Route>
</Switch>,
undefined,
{
initialRoute: `/settings/roles/${CUSTOM_ROLE_ID}`,
appContextOverrides: { activeLicense: invalidLicense },
},
);
await expect(
screen.findByTestId('roles-list-redirect-target'),
).resolves.toBeInTheDocument();
});
describe('permission side panel', () => {
beforeEach(() => {
// Both hooks mocked so data renders synchronously — no React Query scheduler or MSW round-trip.

View File

@@ -11,9 +11,7 @@ import { RoleListPermission } from 'hooks/useAuthZ/permissions/role.permissions'
import { useAuthZ } from 'hooks/useAuthZ/useAuthZ';
import useUrlQuery from 'hooks/useUrlQuery';
import LineClampedText from 'periscope/components/LineClampedText/LineClampedText';
import { useAppContext } from 'providers/App/App';
import { useTimezone } from 'providers/Timezone';
import { LicenseStatus } from 'types/api/licensesV3/getActive';
import { RoleType } from 'types/roles';
import { toAPIError } from 'utils/errorUtils';
@@ -32,9 +30,6 @@ interface RolesListingTableProps {
function RolesListingTable({
searchQuery,
}: RolesListingTableProps): JSX.Element {
const { activeLicense } = useAppContext();
const isValidLicense = activeLicense?.status === LicenseStatus.VALID;
const { permissions: listPerms, isLoading: isAuthZLoading } = useAuthZ([
RoleListPermission,
]);
@@ -208,27 +203,19 @@ function RolesListingTable({
const renderRow = (role: AuthtypesRoleDTO): JSX.Element => (
<div
key={role.id}
className={`roles-table-row${isValidLicense ? ' roles-table-row--clickable' : ''}`}
role={isValidLicense ? 'button' : undefined}
tabIndex={isValidLicense ? 0 : undefined}
onClick={
isValidLicense
? (): void => {
if (role.id) {
navigateToRole(role.id, role.name);
}
}
: undefined
}
onKeyDown={
isValidLicense
? (e): void => {
if ((e.key === 'Enter' || e.key === ' ') && role.id) {
navigateToRole(role.id, role.name);
}
}
: undefined
}
className="roles-table-row roles-table-row--clickable"
role="button"
tabIndex={0}
onClick={(): void => {
if (role.id) {
navigateToRole(role.id, role.name);
}
}}
onKeyDown={(e): void => {
if ((e.key === 'Enter' || e.key === ' ') && role.id) {
navigateToRole(role.id, role.name);
}
}}
>
<div className="roles-table-cell roles-table-cell--name">
{role.name ?? '—'}

View File

@@ -4,8 +4,6 @@ import { Button } from '@signozhq/ui/button';
import { Input } from '@signozhq/ui/input';
import AuthZTooltip from 'components/AuthZTooltip/AuthZTooltip';
import { RoleCreatePermission } from 'hooks/useAuthZ/permissions/role.permissions';
import { useAppContext } from 'providers/App/App';
import { LicenseStatus } from 'types/api/licensesV3/getActive';
import CreateRoleModal from './RolesComponents/CreateRoleModal';
import RolesListingTable from './RolesComponents/RolesListingTable';
@@ -15,8 +13,6 @@ import './RolesSettings.styles.scss';
function RolesSettings(): JSX.Element {
const [searchQuery, setSearchQuery] = useState('');
const [isCreateModalOpen, setIsCreateModalOpen] = useState(false);
const { activeLicense } = useAppContext();
const isValidLicense = activeLicense?.status === LicenseStatus.VALID;
return (
<div className="roles-settings" data-testid="roles-settings">
@@ -42,19 +38,17 @@ function RolesSettings(): JSX.Element {
value={searchQuery}
onChange={(e): void => setSearchQuery(e.target.value)}
/>
{isValidLicense && (
<AuthZTooltip checks={[RoleCreatePermission]}>
<Button
variant="solid"
color="primary"
className="role-settings-toolbar-button"
onClick={(): void => setIsCreateModalOpen(true)}
>
<Plus size={14} />
Custom role
</Button>
</AuthZTooltip>
)}
<AuthZTooltip checks={[RoleCreatePermission]}>
<Button
variant="solid"
color="primary"
className="role-settings-toolbar-button"
onClick={(): void => setIsCreateModalOpen(true)}
>
<Plus size={14} />
Custom role
</Button>
</AuthZTooltip>
</div>
<RolesListingTable searchQuery={searchQuery} />
</div>

View File

@@ -6,7 +6,7 @@ import { server } from 'mocks-server/server';
import { rest } from 'msw';
import { fireEvent, render, screen } from 'tests/test-utils';
import { useAuthZ } from 'hooks/useAuthZ/useAuthZ';
import { invalidLicense, mockUseAuthZGrantAll } from 'tests/authz-test-utils';
import { mockUseAuthZGrantAll } from 'tests/authz-test-utils';
import RolesSettings from '../RolesSettings';
@@ -176,26 +176,6 @@ describe('RolesSettings', () => {
}
});
it('hides the create button and disables row clicks when license is not valid', async () => {
render(<RolesSettings />, undefined, {
appContextOverrides: { activeLicense: invalidLicense },
});
await expect(screen.findByText('signoz-admin')).resolves.toBeInTheDocument();
// Create button must be absent
expect(
screen.queryByRole('button', { name: /custom role/i }),
).not.toBeInTheDocument();
// Rows must not carry the clickable class or button role
const rows = document.querySelectorAll('.roles-table-row');
rows.forEach((row) => {
expect(row).not.toHaveClass('roles-table-row--clickable');
expect(row.getAttribute('role')).not.toBe('button');
});
});
it('handles invalid dates gracefully by showing fallback', async () => {
const invalidRole = {
id: 'edge-0009',

View File

@@ -1,4 +1,5 @@
import type {
CoretypesResourceRefDTO,
CoretypesObjectGroupDTO,
CoretypesTypeDTO,
} from 'api/generated/services/sigNoz.schemas';
@@ -7,7 +8,11 @@ import type {
PermissionConfig,
ResourceDefinition,
} from '../PermissionSidePanel/PermissionSidePanel.types';
import type { AuthzResources } from '../utils';
type AuthzResources = {
resources: CoretypesResourceRefDTO[];
relations: Record<string, string[]>;
};
import { PermissionScope } from '../PermissionSidePanel/PermissionSidePanel.types';
import {
buildConfig,
@@ -36,14 +41,12 @@ jest.mock('../RoleDetails/constants', () => {
const dashboardResource: AuthzResources['resources'][number] = {
kind: 'dashboard',
type: 'metaresource',
allowedVerbs: ['create', 'read', 'update', 'delete', 'list'],
type: 'metaresource' as CoretypesTypeDTO,
};
const alertResource: AuthzResources['resources'][number] = {
kind: 'alert',
type: 'metaresource',
allowedVerbs: ['create', 'read', 'update', 'delete', 'list'],
type: 'metaresource' as CoretypesTypeDTO,
};
const baseAuthzResources: AuthzResources = {
@@ -54,16 +57,6 @@ const baseAuthzResources: AuthzResources = {
},
};
// API payload resource refs — only kind+type, no allowedVerbs (matches CoretypesResourceRefDTO shape)
const dashboardResourceRef = {
kind: 'dashboard',
type: 'metaresource' as CoretypesTypeDTO,
};
const alertResourceRef = {
kind: 'alert',
type: 'metaresource' as CoretypesTypeDTO,
};
const resourceDefs: ResourceDefinition[] = [
{
id: 'metaresource:dashboard',
@@ -114,7 +107,7 @@ describe('buildPatchPayload', () => {
});
expect(result.additions).toStrictEqual([
{ resource: dashboardResourceRef, selectors: [ID_B] },
{ resource: dashboardResource, selectors: [ID_B] },
]);
expect(result.deletions).toBeNull();
});
@@ -149,7 +142,7 @@ describe('buildPatchPayload', () => {
});
expect(result.deletions).toStrictEqual([
{ resource: dashboardResourceRef, selectors: [ID_B] },
{ resource: dashboardResource, selectors: [ID_B] },
]);
expect(result.additions).toBeNull();
});
@@ -214,10 +207,10 @@ describe('buildPatchPayload', () => {
});
expect(result.deletions).toStrictEqual([
{ resource: dashboardResourceRef, selectors: ['*'] },
{ resource: dashboardResource, selectors: ['*'] },
]);
expect(result.additions).toStrictEqual([
{ resource: dashboardResourceRef, selectors: [ID_A, ID_B] },
{ resource: dashboardResource, selectors: [ID_A, ID_B] },
]);
});
@@ -248,7 +241,7 @@ describe('buildPatchPayload', () => {
});
expect(result.deletions).toStrictEqual([
{ resource: dashboardResourceRef, selectors: ['*'] },
{ resource: dashboardResource, selectors: ['*'] },
]);
expect(result.additions).toBeNull();
});
@@ -271,7 +264,7 @@ describe('buildPatchPayload', () => {
});
expect(result.deletions).toStrictEqual([
{ resource: dashboardResourceRef, selectors: ['*'] },
{ resource: dashboardResource, selectors: ['*'] },
]);
expect(result.additions).toBeNull();
});
@@ -294,7 +287,7 @@ describe('buildPatchPayload', () => {
});
expect(result.additions).toStrictEqual([
{ resource: dashboardResourceRef, selectors: ['*'] },
{ resource: dashboardResource, selectors: ['*'] },
]);
expect(result.deletions).toBeNull();
});
@@ -320,7 +313,7 @@ describe('buildPatchPayload', () => {
});
expect(result.deletions).toStrictEqual([
{ resource: dashboardResourceRef, selectors: [ID_A, ID_B] },
{ resource: dashboardResource, selectors: [ID_A, ID_B] },
]);
expect(result.additions).toBeNull();
});
@@ -346,7 +339,7 @@ describe('buildPatchPayload', () => {
});
expect(result.additions).toStrictEqual([
{ resource: dashboardResourceRef, selectors: [ID_A] },
{ resource: dashboardResource, selectors: [ID_A] },
]);
expect(result.deletions).toBeNull();
});
@@ -392,7 +385,7 @@ describe('buildPatchPayload', () => {
});
expect(result.additions).toStrictEqual([
{ resource: alertResourceRef, selectors: [ID_B] },
{ resource: alertResource, selectors: [ID_B] },
]);
expect(result.deletions).toBeNull();
});
@@ -401,7 +394,7 @@ describe('buildPatchPayload', () => {
describe('objectsToPermissionConfig', () => {
it('maps a wildcard selector to ALL scope', () => {
const objects: CoretypesObjectGroupDTO[] = [
{ resource: dashboardResourceRef, selectors: ['*'] },
{ resource: dashboardResource, selectors: ['*'] },
];
const result = objectsToPermissionConfig(objects, resourceDefs);
@@ -414,7 +407,7 @@ describe('objectsToPermissionConfig', () => {
it('maps specific selectors to ONLY_SELECTED scope with the IDs', () => {
const objects: CoretypesObjectGroupDTO[] = [
{ resource: dashboardResourceRef, selectors: [ID_A, ID_B] },
{ resource: dashboardResource, selectors: [ID_A, ID_B] },
];
const result = objectsToPermissionConfig(objects, resourceDefs);
@@ -573,41 +566,4 @@ describe('deriveResourcesForRelation', () => {
deriveResourcesForRelation(baseAuthzResources, 'nonexistent'),
).toHaveLength(0);
});
describe('allowedVerbs filtering', () => {
it('excludes resources whose allowedVerbs does not include the relation', () => {
const authz: AuthzResources = {
resources: [
{
kind: 'dashboard',
type: 'metaresource',
allowedVerbs: ['create', 'read', 'update', 'delete', 'list'],
},
{
kind: 'alert',
type: 'metaresource',
allowedVerbs: ['create', 'read', 'update', 'delete', 'list', 'attach'],
},
],
relations: { attach: ['metaresource'] },
};
const result = deriveResourcesForRelation(authz, 'attach');
expect(result).toHaveLength(1);
expect(result[0].id).toBe('metaresource:alert');
});
it('requires both type-relation match and allowedVerbs — neither condition alone is sufficient', () => {
const authz: AuthzResources = {
resources: [
{ kind: 'dashboard', type: 'metaresource', allowedVerbs: ['read'] },
{ kind: 'role', type: 'role', allowedVerbs: ['create'] },
],
relations: { create: ['metaresource'] },
};
expect(deriveResourcesForRelation(authz, 'create')).toHaveLength(0);
});
});
});

View File

@@ -1,9 +1,8 @@
import React from 'react';
import { Badge } from '@signozhq/ui/badge';
import type {
CoretypesObjectGroupDTO,
CoretypesResourceRefDTO,
CoretypesTypeDTO,
CoretypesObjectGroupDTO,
} from 'api/generated/services/sigNoz.schemas';
import { DATE_TIME_FORMATS } from 'constants/dateTimeFormats';
import { capitalize } from 'lodash-es';
@@ -22,11 +21,7 @@ import {
} from './RoleDetails/constants';
export type AuthzResources = {
resources: ReadonlyArray<{
kind: string;
type: string;
allowedVerbs: readonly string[];
}>;
resources: ReadonlyArray<CoretypesResourceRefDTO>;
relations: Readonly<Record<string, ReadonlyArray<string>>>;
};
@@ -74,9 +69,7 @@ export function deriveResourcesForRelation(
}
const supportedTypes = authzResources.relations[relation] ?? [];
return authzResources.resources
.filter(
(r) => supportedTypes.includes(r.type) && r.allowedVerbs.includes(relation),
)
.filter((r) => supportedTypes.includes(r.type))
.map((r) => ({
id: `${r.type}:${r.kind}`,
kind: r.kind,
@@ -148,7 +141,7 @@ export function buildPatchPayload({
}
const resourceDef: CoretypesResourceRefDTO = {
kind: found.kind,
type: found.type as CoretypesTypeDTO,
type: found.type,
};
const initialScope = initial?.scope ?? PermissionScope.NONE;

View File

@@ -11,7 +11,6 @@ function makeSpan(
): FlamegraphSpan {
return {
parentSpanId: '',
traceId: 'trace-1',
hasError: false,
serviceName: 'svc',
name: 'op',

View File

@@ -6,7 +6,6 @@ export const MOCK_SPAN: FlamegraphSpan = {
durationNano: 50_000_000, // 50ms
spanId: 'span-1',
parentSpanId: '',
traceId: 'trace-1',
hasError: false,
serviceName: 'test-service',
name: 'test-span',

View File

@@ -11,13 +11,6 @@ import type {
} from 'hooks/useAuthZ/types';
import { rest } from 'msw';
import type { RestHandler } from 'msw';
import {
LicenseEvent,
LicensePlatform,
type LicenseResModel,
LicenseState,
LicenseStatus,
} from 'types/api/licensesV3/getActive';
export const AUTHZ_CHECK_URL = `${ENVIRONMENT.baseURL || ''}/api/v1/authz/check`;
@@ -104,40 +97,6 @@ export function setupAuthzAllow(
});
}
export function buildLicense(
overrides?: Partial<LicenseResModel>,
): LicenseResModel {
return {
key: 'test-key',
status: LicenseStatus.VALID,
state: LicenseState.ACTIVATED,
platform: LicensePlatform.CLOUD,
event_queue: {
created_at: '0',
event: LicenseEvent.NO_EVENT,
scheduled_at: '0',
status: '',
updated_at: '0',
},
plan: {
created_at: '0',
description: '',
is_active: true,
name: '',
updated_at: '0',
},
plan_id: '0',
free_until: '0',
updated_at: '0',
valid_from: 0,
valid_until: 0,
created_at: '0',
...overrides,
};
}
export const invalidLicense = buildLicense({ status: LicenseStatus.INVALID });
export function mockUseAuthZGrantAll(
permissions: BrandedPermission[],
_options?: UseAuthZOptions,

View File

@@ -23,7 +23,6 @@ export interface FlamegraphSpan {
durationNano: number;
spanId: string;
parentSpanId: string;
traceId: string;
hasError: boolean;
serviceName: string;
name: string;

View File

@@ -48,7 +48,7 @@ export const routePermission: Record<keyof typeof ROUTES, ROLES[]> = {
HOME: ['ADMIN', 'EDITOR', 'VIEWER'],
ALERTS_NEW: ['ADMIN', 'EDITOR'],
ORG_SETTINGS: ['ADMIN'],
MY_SETTINGS: ['ADMIN', 'EDITOR', 'VIEWER', 'ANONYMOUS'],
MY_SETTINGS: ['ADMIN', 'EDITOR', 'VIEWER'],
SERVICE_MAP: ['ADMIN', 'EDITOR', 'VIEWER'],
ALL_CHANNELS: ['ADMIN', 'EDITOR', 'VIEWER'],
INGESTION_SETTINGS: ['ADMIN', 'EDITOR', 'VIEWER'],
@@ -72,7 +72,7 @@ export const routePermission: Record<keyof typeof ROUTES, ROLES[]> = {
NOT_FOUND: ['ADMIN', 'VIEWER', 'EDITOR', 'ANONYMOUS'],
PASSWORD_RESET: ['ADMIN', 'EDITOR', 'VIEWER'],
SERVICE_METRICS: ['ADMIN', 'EDITOR', 'VIEWER'],
SETTINGS: ['ADMIN', 'EDITOR', 'VIEWER', 'ANONYMOUS'],
SETTINGS: ['ADMIN', 'EDITOR', 'VIEWER'],
SIGN_UP: ['ADMIN', 'EDITOR', 'VIEWER'],
TRACES_EXPLORER: ['ADMIN', 'EDITOR', 'VIEWER'],
TRACE: ['ADMIN', 'EDITOR', 'VIEWER'],
@@ -98,10 +98,10 @@ export const routePermission: Record<keyof typeof ROUTES, ROLES[]> = {
GET_STARTED_AZURE_MONITORING: ['ADMIN', 'EDITOR', 'VIEWER'],
WORKSPACE_LOCKED: ['ADMIN', 'EDITOR', 'VIEWER'],
WORKSPACE_SUSPENDED: ['ADMIN', 'EDITOR', 'VIEWER'],
ROLES_SETTINGS: ['ADMIN', 'EDITOR', 'VIEWER', 'ANONYMOUS'],
ROLE_DETAILS: ['ADMIN', 'EDITOR', 'VIEWER', 'ANONYMOUS'],
ROLES_SETTINGS: ['ADMIN', 'EDITOR', 'VIEWER'],
ROLE_DETAILS: ['ADMIN', 'EDITOR', 'VIEWER'],
MEMBERS_SETTINGS: ['ADMIN'],
SERVICE_ACCOUNTS_SETTINGS: ['ADMIN', 'EDITOR', 'VIEWER', 'ANONYMOUS'],
SERVICE_ACCOUNTS_SETTINGS: ['ADMIN', 'EDITOR', 'VIEWER'],
BILLING: ['ADMIN'],
SUPPORT: ['ADMIN', 'EDITOR', 'VIEWER', 'ANONYMOUS'],
SOMETHING_WENT_WRONG: ['ADMIN', 'EDITOR', 'VIEWER'],

View File

@@ -1140,6 +1140,8 @@ func (r *ClickHouseReader) GetFlamegraphSpansForTrace(ctx context.Context, orgID
// map[traceID][level]span
var selectedSpans = [][]*model.FlamegraphSpan{}
var traceRoots []*model.FlamegraphSpan
// time bounds for Pass 1 and Pass 2 (set on cache miss, zero on cache hit)
var tsBucketStart, tsBucketEnd int64
// get the trace tree from cache!
cachedTraceData, err := r.GetFlamegraphSpansForTraceCache(ctx, orgID, traceID)
@@ -1155,62 +1157,59 @@ func (r *ClickHouseReader) GetFlamegraphSpansForTrace(ctx context.Context, orgID
if err != nil {
r.logger.Info("cache miss for getFlamegraphSpansForTrace", "traceID", traceID)
selectCols := "timestamp, duration_nano, span_id, trace_id, has_error, links as references, resource_string_service$$name, name, events"
if len(req.SelectFields) > 0 {
selectCols += ", attributes_string, attributes_number, attributes_bool, resources_string"
// Inline summary query to get time bounds shared by Pass 1 and Pass 2.
var traceSummary model.TraceSummary
summaryQuery := fmt.Sprintf(
"SELECT trace_id, min(start) AS start, max(end) AS end, sum(num_spans) AS num_spans FROM %s.%s WHERE trace_id=$1 GROUP BY trace_id",
r.TraceDB, r.traceSummaryTable)
if summaryErr := r.db.QueryRow(ctx, summaryQuery, traceID).Scan(
&traceSummary.TraceID, &traceSummary.Start, &traceSummary.End, &traceSummary.NumSpans,
); summaryErr != nil {
if summaryErr == sql.ErrNoRows {
return trace, nil
}
r.logger.Error("Error in processing flamegraph trace summary sql query", errorsV2.Attr(summaryErr))
return nil, model.ExecutionError(fmt.Errorf("getFlamegraphSpansForTrace: error querying trace summary: %w", summaryErr))
}
flamegraphQuery := fmt.Sprintf("SELECT %s FROM %s.%s WHERE trace_id=$1 and ts_bucket_start>=$2 and ts_bucket_start<=$3 ORDER BY timestamp ASC, name ASC", selectCols, r.TraceDB, r.traceTableName)
tsBucketStart = traceSummary.Start.Unix() - 1800
tsBucketEnd = traceSummary.End.Unix()
searchScanResponses, err := r.GetSpansForTrace(ctx, traceID, flamegraphQuery)
if err != nil {
return nil, err
// Pass 1: skeleton query — no events, no attribute maps.
// Keeps tree-building memory lean; events are fetched in Pass 2 only for
// the windowed spans that are actually returned in the response.
skeletonQuery := fmt.Sprintf(
"SELECT DISTINCT ON (span_id) timestamp, duration_nano, span_id, parent_span_id, has_error, resource_string_service$$name, name FROM %s.%s WHERE trace_id=$1 AND ts_bucket_start>=$2 AND ts_bucket_start<=$3 ORDER BY timestamp ASC, name ASC",
r.TraceDB, r.traceTableName)
var skeletonSpans []model.SpanItemV2
if skeletonErr := r.db.Select(ctx, &skeletonSpans, skeletonQuery, traceID,
strconv.FormatInt(tsBucketStart, 10), strconv.FormatInt(tsBucketEnd, 10),
); skeletonErr != nil {
r.logger.Error("Error in processing flamegraph skeleton sql query", errorsV2.Attr(skeletonErr))
return nil, model.ExecutionError(fmt.Errorf("getFlamegraphSpansForTrace: error querying skeleton spans: %w", skeletonErr))
}
if len(searchScanResponses) == 0 {
if len(skeletonSpans) == 0 {
return trace, nil
}
for _, item := range searchScanResponses {
ref := []model.OtelSpanRef{}
err := json.Unmarshal([]byte(item.References), &ref)
if err != nil {
r.logger.Error("Error unmarshalling references", errorsV2.Attr(err))
return nil, errorsV2.Newf(errorsV2.TypeInternal, errorsV2.CodeInternal, "getFlamegraphSpansForTrace: error in unmarshalling references %s", err.Error())
}
events := make([]model.Event, 0)
for _, event := range item.Events {
var eventMap model.Event
err = json.Unmarshal([]byte(event), &eventMap)
if err != nil {
r.logger.Error("Error unmarshalling events", errorsV2.Attr(err))
return nil, errorsV2.Newf(errorsV2.TypeInternal, errorsV2.CodeInternal, "getFlamegraphSpansForTrace: error in unmarshalling events %s", err.Error())
}
events = append(events, eventMap)
}
for _, item := range skeletonSpans {
jsonItem := model.FlamegraphSpan{
SpanID: item.SpanID,
TraceID: item.TraceID,
ServiceName: item.ServiceName,
Name: item.Name,
DurationNano: item.DurationNano,
HasError: item.HasError,
References: ref,
Events: events,
ParentSpanID: item.ParentSpanId,
Children: make([]*model.FlamegraphSpan, 0),
}
if len(req.SelectFields) > 0 {
jsonItem.SetRequestedFields(item, req.SelectFields)
}
// metadata calculation
startTimeUnixNano := uint64(item.TimeUnixNano.UnixNano())
if startTime == 0 || startTimeUnixNano < startTime {
startTime = startTimeUnixNano
}
if endTime == 0 || (startTimeUnixNano+jsonItem.DurationNano) > endTime {
endTime = (startTimeUnixNano + jsonItem.DurationNano)
endTime = startTimeUnixNano + jsonItem.DurationNano
}
if durationNano == 0 || jsonItem.DurationNano > durationNano {
durationNano = jsonItem.DurationNano
@@ -1219,41 +1218,34 @@ func (r *ClickHouseReader) GetFlamegraphSpansForTrace(ctx context.Context, orgID
jsonItem.TimeUnixNano = uint64(item.TimeUnixNano.UnixNano() / 1000000)
spanIdToSpanNodeMap[jsonItem.SpanID] = &jsonItem
}
skeletonSpans = nil
// traverse through the map and append each node to the children array of the parent node
// and add missing spans
// build parent-child tree using parent_span_id; insert placeholders for missing parents
for _, spanNode := range spanIdToSpanNodeMap {
hasParentSpanNode := false
for _, reference := range spanNode.References {
if reference.RefType == "CHILD_OF" && reference.SpanId != "" {
hasParentSpanNode = true
if parentNode, exists := spanIdToSpanNodeMap[reference.SpanId]; exists {
parentNode.Children = append(parentNode.Children, spanNode)
} else {
// insert the missing spans
missingSpan := model.FlamegraphSpan{
SpanID: reference.SpanId,
TraceID: spanNode.TraceID,
ServiceName: "",
Name: "Missing Span",
TimeUnixNano: spanNode.TimeUnixNano,
DurationNano: spanNode.DurationNano,
HasError: false,
Events: make([]model.Event, 0),
Children: make([]*model.FlamegraphSpan, 0),
}
missingSpan.Children = append(missingSpan.Children, spanNode)
spanIdToSpanNodeMap[missingSpan.SpanID] = &missingSpan
traceRoots = append(traceRoots, &missingSpan)
}
}
}
if !hasParentSpanNode && !tracedetail.ContainsFlamegraphSpan(traceRoots, spanNode) {
if spanNode.ParentSpanID == "" {
traceRoots = append(traceRoots, spanNode)
} else if parentNode, exists := spanIdToSpanNodeMap[spanNode.ParentSpanID]; exists {
parentNode.Children = append(parentNode.Children, spanNode)
} else {
if _, alreadyCreated := spanIdToSpanNodeMap[spanNode.ParentSpanID]; !alreadyCreated {
missingSpan := &model.FlamegraphSpan{
SpanID: spanNode.ParentSpanID,
Name: "Missing Span",
TimeUnixNano: spanNode.TimeUnixNano,
DurationNano: spanNode.DurationNano,
Events: make([]model.Event, 0),
Children: make([]*model.FlamegraphSpan, 0),
}
spanIdToSpanNodeMap[missingSpan.SpanID] = missingSpan
traceRoots = append(traceRoots, missingSpan)
}
spanIdToSpanNodeMap[spanNode.ParentSpanID].Children = append(
spanIdToSpanNodeMap[spanNode.ParentSpanID].Children, spanNode)
}
}
selectedSpans = tracedetail.GetAllSpansForFlamegraph(traceRoots, spanIdToSpanNodeMap)
spanIdToSpanNodeMap = nil
// TODO: set the trace data (model.GetFlamegraphSpansForTraceCache) in cache here
// removed existing cache usage since it was not getting used due to this bug https://github.com/SigNoz/engineering-pod/issues/4648
@@ -1276,6 +1268,74 @@ func (r *ClickHouseReader) GetFlamegraphSpansForTrace(ctx context.Context, orgID
}
r.logger.Debug("getFlamegraphSpansForTrace: processing post cache", "duration", time.Since(processingPostCache), "traceID", traceID, "totalSpans", totalSpanCount, "limit", clientLimit)
// Pass 2: hydrate events and requested attribute fields only for the selected window spans.
// tsBucketStart is non-zero only when we performed a DB fetch (cache miss path).
if err != nil && tsBucketStart != 0 {
needsAttrMaps := false
needsResourceMap := false
for _, f := range req.SelectFields {
if f.FieldContext == telemetrytypes.FieldContextAttribute {
needsAttrMaps = true
}
if f.FieldContext == telemetrytypes.FieldContextResource {
needsResourceMap = true
}
}
selectedSpanIDs := make([]string, 0)
selectedSpanMap := make(map[string]*model.FlamegraphSpan)
for _, level := range selectedSpansForRequest {
for _, span := range level {
selectedSpanIDs = append(selectedSpanIDs, span.SpanID)
selectedSpanMap[span.SpanID] = span
}
}
if len(selectedSpanIDs) > 0 {
hydrateCols := "span_id, events"
if needsAttrMaps {
hydrateCols += ", attributes_string, attributes_number, attributes_bool"
}
if needsResourceMap {
hydrateCols += ", resources_string"
}
hydrateQuery := fmt.Sprintf(
"SELECT %s FROM %s.%s WHERE trace_id=@traceID AND ts_bucket_start>=@tsStart AND ts_bucket_start<=@tsEnd AND span_id IN @spanIDs",
hydrateCols, r.TraceDB, r.traceTableName)
var hydrateRows []model.SpanItemV2
if hydrateErr := r.db.Select(ctx, &hydrateRows, hydrateQuery,
clickhouse.Named("traceID", traceID),
clickhouse.Named("tsStart", tsBucketStart),
clickhouse.Named("tsEnd", tsBucketEnd),
clickhouse.Named("spanIDs", selectedSpanIDs),
); hydrateErr != nil {
r.logger.Error("Error in processing flamegraph hydration sql query", errorsV2.Attr(hydrateErr))
return nil, model.ExecutionError(fmt.Errorf("getFlamegraphSpansForTrace: error querying events: %w", hydrateErr))
}
for _, item := range hydrateRows {
span, ok := selectedSpanMap[item.SpanID]
if !ok {
continue
}
events := make([]model.Event, 0, len(item.Events))
for _, event := range item.Events {
var eventMap model.Event
if unmarshalErr := json.Unmarshal([]byte(event), &eventMap); unmarshalErr != nil {
r.logger.Error("Error unmarshalling events", errorsV2.Attr(unmarshalErr))
return nil, errorsV2.Newf(errorsV2.TypeInternal, errorsV2.CodeInternal, "getFlamegraphSpansForTrace: error in unmarshalling events %s", unmarshalErr.Error())
}
events = append(events, eventMap)
}
span.Events = events
if len(req.SelectFields) > 0 {
span.SetRequestedFields(item, req.SelectFields)
}
}
}
}
trace.Spans = selectedSpansForRequest
trace.StartTimestampMillis = startTime / 1000000
trace.EndTimestampMillis = endTime / 1000000

View File

@@ -297,14 +297,13 @@ type FlamegraphSpan struct {
TimeUnixNano uint64 `json:"timestamp"`
DurationNano uint64 `json:"durationNano"`
SpanID string `json:"spanId"`
TraceID string `json:"traceId"`
HasError bool `json:"hasError"`
ServiceName string `json:"serviceName"`
Name string `json:"name"`
Level int64 `json:"level"`
ParentSpanID string `json:"parentSpanId"`
Events []Event `json:"event"`
References []OtelSpanRef `json:"references,omitempty"`
Children []*FlamegraphSpan `json:"children"`
Children []*FlamegraphSpan `json:"-"`
Attributes map[string]any `json:"attributes,omitempty"`
Resource map[string]string `json:"resource,omitempty"`
}