Compare commits

...

51 Commits

Author SHA1 Message Date
Srikanth Chekuri
0193b4f9bf Merge branch 'main' into feat/services_use_span_metrics 2026-01-23 04:17:43 +05:30
nikhilmantri0902
ce05187ee5 chore: added spanMetricsSingleBucketStep correction 2026-01-19 17:59:54 +05:30
nikhilmantri0902
b4e3b7f641 chore: added selection criteria for MetricScopeFieldDefinitions 2026-01-16 18:27:02 +05:30
nikhilmantri0902
6a6809fef3 chore: remove average latency 2026-01-16 16:35:59 +05:30
nikhilmantri0902
1ffbc3bd4b chore: removed the format table option from one query 2026-01-16 16:21:15 +05:30
nikhilmantri0902
5af8a02fd8 chore: removed unnecessary comment and added no cache true for span metrics 2026-01-16 15:18:45 +05:30
Nikhil Mantri
034599bbeb Merge branch 'main' into feat/services_use_span_metrics 2026-01-16 13:09:25 +05:30
Srikanth Chekuri
0e731df10a Merge branch 'main' into feat/services_use_span_metrics 2026-01-15 00:49:09 +05:30
Nikhil Mantri
43e5a3d96c Merge branch 'main' into feat/services_use_span_metrics 2026-01-13 15:23:46 +05:30
Srikanth Chekuri
17ff4a1841 Merge branch 'main' into feat/services_use_span_metrics 2026-01-09 23:59:01 +05:30
nikhilmantri0902
5c61aa41f6 chore: openapi fix 2026-01-08 14:56:05 +05:30
Srikanth Chekuri
b514ce5b31 Merge branch 'main' into feat/services_use_span_metrics 2026-01-08 02:04:30 +05:30
nikhilmantri0902
5ffce5de13 chore: small corrections 2026-01-07 17:17:54 +05:30
nikhilmantri0902
cd3838dfb7 chore: periodSeconds calculations correction 2026-01-07 17:15:24 +05:30
nikhilmantri0902
01071cd3e7 chore: small corrections 2026-01-07 17:11:04 +05:30
nikhilmantri0902
1feee4a49f chore: removed unused method 2026-01-07 17:04:27 +05:30
nikhilmantri0902
5d067ed413 chore: added flagger depdendency to services module 2026-01-07 16:55:18 +05:30
nikhilmantri0902
4fc00d2a72 chore: resolved conflicts 2026-01-07 16:24:21 +05:30
nikhilmantri0902
7d1af84a06 chore: removed unnecessary comment 2025-12-31 15:25:23 +05:30
Nikhil Mantri
683d2eebd4 Merge branch 'main' into feat/services_use_span_metrics 2025-12-31 15:15:04 +05:30
nikhilmantri0902
82419a64f8 chore: added large step interval for pxx computation 2025-12-31 13:32:03 +05:30
nikhilmantri0902
3aa5c53ca4 chore: services rate calculation logic change 2025-12-30 16:44:38 +05:30
nikhilmantri0902
608e5d64fe chore: todo added for is averaging the best way to calculate p99 2025-12-30 14:39:00 +05:30
nikhilmantri0902
629b93cc14 chore: service metrics powering homepage component 2025-12-30 12:53:29 +05:30
nikhilmantri0902
3e97c41799 chore: removed unused func 2025-12-29 21:23:47 +05:30
nikhilmantri0902
f775a03d2c chore: removed fmt.Prints 2025-12-29 21:09:32 +05:30
nikhilmantri0902
892ef538bc chore: frontend latency graph unit fix 2025-12-29 20:33:21 +05:30
nikhilmantri0902
5c08458f48 chore: fix for calls and error in key operations table 2025-12-29 20:07:36 +05:30
nikhilmantri0902
e671e9d647 chore: methods rearrangement 2025-12-29 19:18:13 +05:30
nikhilmantri0902
1013cb727c chore: logic change 2025-12-29 17:29:15 +05:30
nikhilmantri0902
932a0c4a83 chore: env variable initialization in the module itself 2025-12-29 16:25:44 +05:30
nikhilmantri0902
426964d489 chore: replace rate -> increase for span metrics num errors and num calls 2025-12-29 15:56:31 +05:30
nikhilmantri0902
c66ad5738e chore: use span metrics 2025-12-28 10:56:20 +05:30
nikhilmantri0902
634e8923c7 chore: add call rate fix 2025-12-27 13:39:02 +05:30
nikhilmantri0902
69855a1136 chore: rate->increase in top ops and entry point ops 2025-12-26 16:09:05 +05:30
nikhilmantri0902
cc1513c7e3 chore: top Ops and entry point calcula
tion fix
2025-12-26 15:33:33 +05:30
nikhilmantri0902
ba60d9ee50 chore: removed method 2025-12-26 12:38:04 +05:30
nikhilmantri0902
261622616f chore: removed log 2025-12-26 12:36:26 +05:30
nikhilmantri0902
592f5a7e11 chore: errorFilterExpr fix 2025-12-26 12:20:51 +05:30
nikhilmantri0902
dc390c813d chore: rate fix 2025-12-26 11:58:03 +05:30
nikhilmantri0902
ba6690cddb chore: lint fix 2025-12-26 11:20:02 +05:30
nikhilmantri0902
3a45ae12d1 chore: frontend change 2025-12-25 19:03:46 +05:30
nikhilmantri0902
5aef65fc11 chore: backend code modification for top_operations and entrypoint_operations 2025-12-25 18:53:41 +05:30
nikhilmantri0902
efd2d961d0 chore: redesign 2025-12-25 17:52:14 +05:30
nikhilmantri0902
ec618a00ce chore: redesign 2025-12-25 17:51:46 +05:30
nikhilmantri0902
c896753d3a chore: latency to nano seconds 2025-12-25 17:43:10 +05:30
nikhilmantri0902
cc616602eb chore: fix 2025-12-25 11:25:03 +05:30
nikhilmantri0902
255847ac61 chore: added mapping functions to handle span metrics 2025-12-24 19:33:09 +05:30
nikhilmantri0902
a1e4461865 chore: isMetricScopeField method + buildMetricScopeCondition method 2025-12-24 14:48:20 +05:30
nikhilmantri0902
146fd9892b chore: isMetricScopeField method + buildMetricScopeCondition method 2025-12-24 14:43:59 +05:30
nikhilmantri0902
b9ecdcf210 chore: added selector for isTopLevelOperation 2025-12-24 14:26:49 +05:30
18 changed files with 959 additions and 438 deletions

View File

@@ -101,6 +101,7 @@ go-test: ## Runs go unit tests
.PHONY: go-run-community
go-run-community: ## Runs the community go backend server
@SIGNOZ_INSTRUMENTATION_LOGS_LEVEL=debug \
SIGNOZ_FLAGGER_CONFIG_BOOLEAN_USE__SPAN__METRICS=true \
SIGNOZ_SQLSTORE_SQLITE_PATH=signoz.db \
SIGNOZ_WEB_ENABLED=false \
SIGNOZ_TOKENIZER_JWT_SECRET=secret \

View File

@@ -1,13 +1,7 @@
import { Button, Select, Skeleton, Table } from 'antd';
import logEvent from 'api/common/logEvent';
import { ENTITY_VERSION_V4 } from 'constants/app';
import ROUTES from 'constants/routes';
import {
getQueryRangeRequestData,
getServiceListFromQuery,
} from 'container/ServiceApplication/utils';
import { useGetQueriesRange } from 'hooks/queryBuilder/useGetQueriesRange';
import useGetTopLevelOperations from 'hooks/useGetTopLevelOperations';
import { useQueryService } from 'hooks/useQueryService';
import useResourceAttribute from 'hooks/useResourceAttribute';
import { convertRawQueriesToTraceSelectedTags } from 'hooks/useResourceAttribute/utils';
import { useSafeNavigate } from 'hooks/useSafeNavigate';
@@ -17,7 +11,6 @@ import Card from 'periscope/components/Card/Card';
import { useAppContext } from 'providers/App/App';
import { IUser } from 'providers/App/types';
import { memo, useCallback, useEffect, useMemo, useState } from 'react';
import { QueryKey } from 'react-query';
import { useSelector } from 'react-redux';
import { Link } from 'react-router-dom';
import { AppState } from 'store/reducers';
@@ -30,7 +23,6 @@ import { GlobalReducer } from 'types/reducer/globalTime';
import { Tags } from 'types/reducer/trace';
import { USER_ROLES } from 'types/roles';
import { FeatureKeys } from '../../../constants/features';
import { DOCS_LINKS } from '../constants';
import { columns, TIME_PICKER_OPTIONS } from './constants';
@@ -166,32 +158,6 @@ function ServiceMetrics({
[queries],
);
const [isError, setIsError] = useState(false);
const queryKey: QueryKey = useMemo(
() => [
timeRange.startTime,
timeRange.endTime,
selectedTags,
globalSelectedInterval,
],
[
timeRange.startTime,
timeRange.endTime,
selectedTags,
globalSelectedInterval,
],
);
const {
data,
isLoading: isLoadingTopLevelOperations,
isError: isErrorTopLevelOperations,
} = useGetTopLevelOperations(queryKey, {
start: timeRange.startTime * 1e6,
end: timeRange.endTime * 1e6,
});
const handleTimeIntervalChange = useCallback((value: number): void => {
const timeInterval = TIME_PICKER_OPTIONS.find(
(option) => option.value === value,
@@ -209,66 +175,29 @@ function ServiceMetrics({
});
}, []);
const topLevelOperations = useMemo(() => Object.entries(data || {}), [data]);
const { featureFlags } = useAppContext();
const dotMetricsEnabled =
featureFlags?.find((flag) => flag.name === FeatureKeys.DOT_METRICS_ENABLED)
?.active || false;
const queryRangeRequestData = useMemo(
() =>
getQueryRangeRequestData({
topLevelOperations,
globalSelectedInterval,
dotMetricsEnabled,
}),
[globalSelectedInterval, topLevelOperations, dotMetricsEnabled],
);
const dataQueries = useGetQueriesRange(
queryRangeRequestData,
ENTITY_VERSION_V4,
{
queryKey: useMemo(
() => [
`GetMetricsQueryRange-home-${globalSelectedInterval}`,
timeRange.endTime,
timeRange.startTime,
globalSelectedInterval,
],
[globalSelectedInterval, timeRange.endTime, timeRange.startTime],
),
// Fetch services data from /api/v2/services
const { data: servicesData, isLoading, isError } = useQueryService({
minTime: timeRange.startTime * 1e6, // Convert ms to nanoseconds
maxTime: timeRange.endTime * 1e6, // Convert ms to nanoseconds
selectedTime: globalSelectedInterval,
selectedTags,
options: {
keepPreviousData: true,
enabled: true,
refetchOnMount: false,
onError: () => {
setIsError(true);
},
},
);
});
const isLoading = useMemo(() => dataQueries.some((query) => query.isLoading), [
dataQueries,
const services: ServicesList[] = useMemo(() => servicesData || [], [
servicesData,
]);
const services: ServicesList[] = useMemo(
() =>
getServiceListFromQuery({
queries: dataQueries,
topLevelOperations,
isLoading,
}),
[dataQueries, topLevelOperations, isLoading],
);
const sortedServices = useMemo(
() =>
services?.sort((a, b) => {
const aUpdateAt = new Date(a.p99).getTime();
const bUpdateAt = new Date(b.p99).getTime();
return bUpdateAt - aUpdateAt;
}) || [],
services?.sort(
(a, b) =>
// p99 is already a number (nanoseconds), sort descending
b.p99 - a.p99,
) || [],
[services],
);
@@ -293,7 +222,7 @@ function ServiceMetrics({
[safeNavigate],
);
if (isLoadingTopLevelOperations || isLoading) {
if (isLoading) {
return (
<Card className="services-list-card home-data-card loading-card">
<Card.Content>
@@ -303,7 +232,7 @@ function ServiceMetrics({
);
}
if (isErrorTopLevelOperations || isError) {
if (isError) {
return (
<Card className="services-list-card home-data-card error-card">
<Card.Content>

View File

@@ -1,155 +0,0 @@
import { OPERATORS } from 'constants/queryBuilder';
import {
BaseAutocompleteData,
DataTypes,
} from 'types/api/queryBuilder/queryAutocompleteResponse';
import { TagFilterItem } from 'types/api/queryBuilder/queryBuilderData';
import {
DataSource,
MetricAggregateOperator,
QueryBuilderData,
} from 'types/common/queryBuilder';
import {
GraphTitle,
KeyOperationTableHeader,
MetricsType,
WidgetKeys,
} from '../constant';
import { TopOperationQueryFactoryProps } from '../Tabs/types';
import { getQueryBuilderQuerieswithFormula } from './MetricsPageQueriesFactory';
export const topOperationQueries = ({
servicename,
dotMetricsEnabled,
}: TopOperationQueryFactoryProps): QueryBuilderData => {
const latencyAutoCompleteData: BaseAutocompleteData = {
key: dotMetricsEnabled
? WidgetKeys.Signoz_latency_bucket
: WidgetKeys.Signoz_latency_bucket_norm,
dataType: DataTypes.Float64,
type: '',
};
const errorRateAutoCompleteData: BaseAutocompleteData = {
key: WidgetKeys.SignozCallsTotal,
dataType: DataTypes.Float64,
type: '',
};
const numOfCallAutoCompleteData: BaseAutocompleteData = {
key: dotMetricsEnabled
? WidgetKeys.SignozLatencyCount
: WidgetKeys.SignozLatencyCountNorm,
dataType: DataTypes.Float64,
type: '',
};
const latencyAndNumberOfCallAdditionalItems: TagFilterItem[] = [
{
id: '',
key: {
key: dotMetricsEnabled
? WidgetKeys.Service_name
: WidgetKeys.Service_name_norm,
dataType: DataTypes.String,
type: MetricsType.Resource,
},
value: [servicename],
op: OPERATORS.IN,
},
];
const errorRateAdditionalItemsA: TagFilterItem[] = [
{
id: '',
key: {
dataType: DataTypes.String,
key: dotMetricsEnabled
? WidgetKeys.Service_name
: WidgetKeys.Service_name_norm,
type: MetricsType.Resource,
},
op: OPERATORS.IN,
value: [servicename],
},
{
id: '',
key: {
dataType: DataTypes.Int64,
key: dotMetricsEnabled ? WidgetKeys.StatusCode : WidgetKeys.StatusCodeNorm,
type: MetricsType.Tag,
},
op: OPERATORS.IN,
value: ['STATUS_CODE_ERROR'],
},
];
const errorRateAdditionalItemsB = latencyAndNumberOfCallAdditionalItems;
const groupBy: BaseAutocompleteData[] = [
{
dataType: DataTypes.String,
key: WidgetKeys.Operation,
type: MetricsType.Tag,
},
];
const autocompleteData = [
latencyAutoCompleteData,
latencyAutoCompleteData,
latencyAutoCompleteData,
errorRateAutoCompleteData,
errorRateAutoCompleteData,
numOfCallAutoCompleteData,
];
const additionalItems = [
latencyAndNumberOfCallAdditionalItems,
latencyAndNumberOfCallAdditionalItems,
latencyAndNumberOfCallAdditionalItems,
errorRateAdditionalItemsA,
errorRateAdditionalItemsB,
latencyAndNumberOfCallAdditionalItems,
];
const disabled = [false, false, false, true, true, false];
const legends = [
KeyOperationTableHeader.P50,
KeyOperationTableHeader.P90,
KeyOperationTableHeader.P99,
KeyOperationTableHeader.ERROR_RATE,
KeyOperationTableHeader.ERROR_RATE,
KeyOperationTableHeader.NUM_OF_CALLS,
];
const timeAggregateOperators = [
MetricAggregateOperator.EMPTY,
MetricAggregateOperator.EMPTY,
MetricAggregateOperator.EMPTY,
MetricAggregateOperator.RATE,
MetricAggregateOperator.RATE,
MetricAggregateOperator.RATE,
];
const spaceAggregateOperators = [
MetricAggregateOperator.P50,
MetricAggregateOperator.P90,
MetricAggregateOperator.P99,
MetricAggregateOperator.SUM,
MetricAggregateOperator.SUM,
MetricAggregateOperator.SUM,
];
const expressions = ['D*100/E'];
const legendFormulas = [GraphTitle.ERROR_PERCENTAGE];
const dataSource = DataSource.METRICS;
return getQueryBuilderQuerieswithFormula({
autocompleteData,
additionalItems,
disabled,
legends,
timeAggregateOperators,
spaceAggregateOperators,
expressions,
legendFormulas,
dataSource,
groupBy,
});
};

View File

@@ -46,7 +46,6 @@ import GraphControlsPanel from './Overview/GraphControlsPanel/GraphControlsPanel
import ServiceOverview from './Overview/ServiceOverview';
import TopLevelOperation from './Overview/TopLevelOperations';
import TopOperation from './Overview/TopOperation';
import TopOperationMetrics from './Overview/TopOperationMetrics';
import { Button, Card } from './styles';
import { IServiceName } from './types';
import {
@@ -72,10 +71,6 @@ function Application(): JSX.Element {
const urlQuery = useUrlQuery();
const { featureFlags } = useAppContext();
const isSpanMetricEnabled =
featureFlags?.find((flag) => flag.name === FeatureKeys.USE_SPAN_METRICS)
?.active || false;
const handleSetTimeStamp = useCallback((selectTime: number) => {
setSelectedTimeStamp(selectTime);
}, []);
@@ -396,7 +391,7 @@ function Application(): JSX.Element {
<Col span={12}>
<Card>
{isSpanMetricEnabled ? <TopOperationMetrics /> : <TopOperation />}{' '}
<TopOperation />
</Card>
</Col>
</Row>

View File

@@ -78,7 +78,7 @@ function ServiceOverview({
},
title: GraphTitle.LATENCY,
panelTypes: PANEL_TYPES.TIME_SERIES,
yAxisUnit: 'ns',
yAxisUnit: isSpanMetricEnable ? 'ms' : 'ns',
id: SERVICE_CHART_ID.latency,
}),
[

View File

@@ -1,130 +0,0 @@
import { ENTITY_VERSION_V4 } from 'constants/app';
import { PANEL_TYPES } from 'constants/queryBuilder';
import { topOperationMetricsDownloadOptions } from 'container/MetricsApplication/constant';
import { getWidgetQueryBuilder } from 'container/MetricsApplication/MetricsApplication.factory';
import { topOperationQueries } from 'container/MetricsApplication/MetricsPageQueries/TopOperationQueries';
import { QueryTable } from 'container/QueryTable';
import { useGetQueryRange } from 'hooks/queryBuilder/useGetQueryRange';
import { updateStepInterval } from 'hooks/queryBuilder/useStepInterval';
import { useNotifications } from 'hooks/useNotifications';
import useResourceAttribute from 'hooks/useResourceAttribute';
import { convertRawQueriesToTraceSelectedTags } from 'hooks/useResourceAttribute/utils';
import { RowData } from 'lib/query/createTableColumnsFromQuery';
import { ReactNode, useMemo } from 'react';
import { useSelector } from 'react-redux';
import { useParams } from 'react-router-dom';
import { AppState } from 'store/reducers';
import { EQueryType } from 'types/common/dashboard';
import { GlobalReducer } from 'types/reducer/globalTime';
import { v4 as uuid } from 'uuid';
import { FeatureKeys } from '../../../../constants/features';
import { useAppContext } from '../../../../providers/App/App';
import { IServiceName } from '../types';
import { title } from './config';
import ColumnWithLink from './TableRenderer/ColumnWithLink';
import { getTableColumnRenderer } from './TableRenderer/TableColumnRenderer';
function TopOperationMetrics(): JSX.Element {
const { servicename: encodedServiceName } = useParams<IServiceName>();
const servicename = decodeURIComponent(encodedServiceName);
const { notifications } = useNotifications();
const { minTime, maxTime, selectedTime: globalSelectedInterval } = useSelector<
AppState,
GlobalReducer
>((state) => state.globalTime);
const { queries } = useResourceAttribute();
const selectedTraceTags = JSON.stringify(
convertRawQueriesToTraceSelectedTags(queries) || [],
);
const { featureFlags } = useAppContext();
const dotMetricsEnabled =
featureFlags?.find((flag) => flag.name === FeatureKeys.DOT_METRICS_ENABLED)
?.active || false;
const keyOperationWidget = useMemo(
() =>
getWidgetQueryBuilder({
query: {
queryType: EQueryType.QUERY_BUILDER,
promql: [],
builder: topOperationQueries({
servicename,
dotMetricsEnabled,
}),
clickhouse_sql: [],
id: uuid(),
},
panelTypes: PANEL_TYPES.TABLE,
}),
[servicename, dotMetricsEnabled],
);
const updatedQuery = updateStepInterval(keyOperationWidget.query);
const isEmptyWidget = keyOperationWidget.id === PANEL_TYPES.EMPTY_WIDGET;
const { data, isLoading } = useGetQueryRange(
{
selectedTime: keyOperationWidget?.timePreferance,
graphType: keyOperationWidget?.panelTypes,
query: updatedQuery,
globalSelectedInterval,
variables: {},
},
ENTITY_VERSION_V4,
{
queryKey: [
`GetMetricsQueryRange-${keyOperationWidget?.timePreferance}-${globalSelectedInterval}-${keyOperationWidget?.id}`,
keyOperationWidget,
maxTime,
minTime,
globalSelectedInterval,
],
keepPreviousData: true,
enabled: !isEmptyWidget,
refetchOnMount: false,
onError: (error) => {
notifications.error({ message: error.message });
},
},
);
const queryTableData = data?.payload?.data?.newResult?.data?.result || [];
const renderColumnCell = useMemo(
() =>
getTableColumnRenderer({
columnName: 'operation',
renderFunction: (record: RowData): ReactNode => (
<ColumnWithLink
servicename={servicename}
minTime={minTime}
maxTime={maxTime}
selectedTraceTags={selectedTraceTags}
record={record}
/>
),
}),
[servicename, minTime, maxTime, selectedTraceTags],
);
return (
<QueryTable
title={title}
query={updatedQuery}
queryTableData={queryTableData}
loading={isLoading}
renderColumnCell={renderColumnCell}
downloadOption={topOperationMetricsDownloadOptions}
sticky
/>
);
}
export default TopOperationMetrics;

View File

@@ -1,6 +1,5 @@
/* eslint-disable sonarjs/no-duplicate-string */
import { DownloadOptions } from 'container/Download/Download.types';
import { MenuItemKeys } from 'container/GridCardLayout/WidgetHeader/contants';
import {
MetricAggregateOperator,
@@ -107,11 +106,6 @@ export enum WidgetKeys {
Db_system_norm = 'db_system',
}
export const topOperationMetricsDownloadOptions: DownloadOptions = {
isDownloadEnabled: true,
fileName: 'top-operation',
} as const;
export const SERVICE_CHART_ID = {
latency: 'SERVICE_OVERVIEW_LATENCY',
error: 'SERVICE_OVERVIEW_ERROR',

View File

@@ -1,22 +1,14 @@
import * as Sentry from '@sentry/react';
import { FeatureKeys } from 'constants/features';
import ErrorBoundaryFallback from 'pages/ErrorBoundaryFallback/ErrorBoundaryFallback';
import { useAppContext } from 'providers/App/App';
import ServiceMetrics from './ServiceMetrics';
import ServiceTraces from './ServiceTraces';
import { Container } from './styles';
function Services(): JSX.Element {
const { featureFlags } = useAppContext();
const isSpanMetricEnabled =
featureFlags?.find((flag) => flag.name === FeatureKeys.USE_SPAN_METRICS)
?.active || false;
return (
<Sentry.ErrorBoundary fallback={<ErrorBoundaryFallback />}>
<Container style={{ marginTop: 0 }}>
{isSpanMetricEnabled ? <ServiceMetrics /> : <ServiceTraces />}
<ServiceTraces />
</Container>
</Sentry.ErrorBoundary>
);

View File

@@ -3,12 +3,26 @@ package implservices
import (
"fmt"
"strings"
"time"
"github.com/SigNoz/signoz/pkg/errors"
qbtypes "github.com/SigNoz/signoz/pkg/types/querybuildertypes/querybuildertypesv5"
"github.com/SigNoz/signoz/pkg/types/servicetypes/servicetypesv1"
)
// spanMetricsSingleBucketStep keeps pxx queries in a single bucket
// while staying within the requested time range.
func spanMetricsSingleBucketStep(startMs, endMs uint64) qbtypes.Step {
if endMs <= startMs {
return qbtypes.Step{Duration: time.Second}
}
duration := time.Duration(endMs-startMs) * time.Millisecond
if duration < time.Second {
duration = time.Second
}
return qbtypes.Step{Duration: duration}
}
// validateTagFilterItems validates the tag filter items. This should be used before using
// buildFilterExpression or any other function that uses tag filter items.
func validateTagFilterItems(tags []servicetypesv1.TagFilterItem) error {

View File

@@ -3,16 +3,20 @@ package implservices
import (
"context"
"fmt"
"time"
"math"
"sort"
"strconv"
"time"
"github.com/ClickHouse/clickhouse-go/v2"
"github.com/SigNoz/signoz/pkg/errors"
"github.com/SigNoz/signoz/pkg/flagger"
"github.com/SigNoz/signoz/pkg/modules/services"
"github.com/SigNoz/signoz/pkg/querier"
"github.com/SigNoz/signoz/pkg/telemetrystore"
"github.com/SigNoz/signoz/pkg/telemetrytraces"
"github.com/SigNoz/signoz/pkg/types/featuretypes"
"github.com/SigNoz/signoz/pkg/types/metrictypes"
qbtypes "github.com/SigNoz/signoz/pkg/types/querybuildertypes/querybuildertypesv5"
"github.com/SigNoz/signoz/pkg/types/servicetypes/servicetypesv1"
"github.com/SigNoz/signoz/pkg/types/telemetrytypes"
@@ -22,13 +26,15 @@ import (
type module struct {
Querier querier.Querier
TelemetryStore telemetrystore.TelemetryStore
Flagger flagger.Flagger
}
// NewModule constructs the services module with the provided querier dependency.
func NewModule(q querier.Querier, ts telemetrystore.TelemetryStore) services.Module {
func NewModule(q querier.Querier, ts telemetrystore.TelemetryStore, flagger flagger.Flagger) services.Module {
return &module{
Querier: q,
TelemetryStore: ts,
Flagger: flagger,
}
}
@@ -74,10 +80,25 @@ func (m *module) Get(ctx context.Context, orgUUID valuer.UUID, req *servicetypes
return nil, errors.NewInvalidInputf(errors.CodeInvalidInput, "request is nil")
}
// Prepare phase
queryRangeReq, startMs, endMs, err := m.buildQueryRangeRequest(req)
if err != nil {
return nil, err
var (
startMs uint64
endMs uint64
err error
queryRangeReq *qbtypes.QueryRangeRequest
)
evalCtx := featuretypes.NewFlaggerEvaluationContext(orgUUID)
useSpanMetrics := m.Flagger.BooleanOrEmpty(ctx, flagger.FeatureUseSpanMetrics, evalCtx)
if useSpanMetrics {
queryRangeReq, startMs, endMs, err = m.buildSpanMetricsQueryRangeRequest(req)
if err != nil {
return nil, err
}
} else {
queryRangeReq, startMs, endMs, err = m.buildQueryRangeRequest(req)
if err != nil {
return nil, err
}
}
// Fetch phase
@@ -87,7 +108,14 @@ func (m *module) Get(ctx context.Context, orgUUID valuer.UUID, req *servicetypes
}
// Process phase
items, serviceNames := m.mapQueryRangeRespToServices(resp, startMs, endMs)
var items []*servicetypesv1.ResponseItem
var serviceNames []string
if useSpanMetrics {
items, serviceNames = m.mapSpanMetricsRespToServices(resp, startMs, endMs)
} else {
items, serviceNames = m.mapQueryRangeRespToServices(resp, startMs, endMs)
}
if len(items) == 0 {
return []*servicetypesv1.ResponseItem{}, nil
}
@@ -108,9 +136,23 @@ func (m *module) GetTopOperations(ctx context.Context, orgUUID valuer.UUID, req
return nil, errors.NewInvalidInputf(errors.CodeInvalidInput, "request is nil")
}
qr, err := m.buildTopOpsQueryRangeRequest(req)
if err != nil {
return nil, err
var (
qr *qbtypes.QueryRangeRequest
err error
)
evalCtx := featuretypes.NewFlaggerEvaluationContext(orgUUID)
useSpanMetrics := m.Flagger.BooleanOrEmpty(ctx, flagger.FeatureUseSpanMetrics, evalCtx)
if useSpanMetrics {
qr, err = m.buildSpanMetricsTopOpsQueryRangeRequest(req)
if err != nil {
return nil, err
}
} else {
qr, err = m.buildTopOpsQueryRangeRequest(req)
if err != nil {
return nil, err
}
}
resp, err := m.executeQuery(ctx, orgUUID, qr)
@@ -118,7 +160,17 @@ func (m *module) GetTopOperations(ctx context.Context, orgUUID valuer.UUID, req
return nil, err
}
items := m.mapTopOpsQueryRangeResp(resp)
var items []servicetypesv1.OperationItem
if useSpanMetrics {
items = m.mapSpanMetricsTopOpsResp(resp)
// Apply limit after merging multiple queries
if req.Limit > 0 && len(items) > req.Limit {
items = items[:req.Limit]
}
} else {
items = m.mapTopOpsQueryRangeResp(resp)
}
return items, nil
}
@@ -128,9 +180,23 @@ func (m *module) GetEntryPointOperations(ctx context.Context, orgUUID valuer.UUI
return nil, errors.NewInvalidInputf(errors.CodeInvalidInput, "request is nil")
}
qr, err := m.buildEntryPointOpsQueryRangeRequest(req)
if err != nil {
return nil, err
var (
qr *qbtypes.QueryRangeRequest
err error
)
evalCtx := featuretypes.NewFlaggerEvaluationContext(orgUUID)
useSpanMetrics := m.Flagger.BooleanOrEmpty(ctx, flagger.FeatureUseSpanMetrics, evalCtx)
if useSpanMetrics {
qr, err = m.buildSpanMetricsEntryPointOpsQueryRangeRequest(req)
if err != nil {
return nil, err
}
} else {
qr, err = m.buildEntryPointOpsQueryRangeRequest(req)
if err != nil {
return nil, err
}
}
resp, err := m.executeQuery(ctx, orgUUID, qr)
@@ -138,10 +204,36 @@ func (m *module) GetEntryPointOperations(ctx context.Context, orgUUID valuer.UUI
return nil, err
}
items := m.mapEntryPointOpsQueryRangeResp(resp)
var items []servicetypesv1.OperationItem
if useSpanMetrics {
items = m.mapSpanMetricsEntryPointOpsResp(resp)
// Apply limit after merging multiple queries
if req.Limit > 0 && len(items) > req.Limit {
items = items[:req.Limit]
}
} else {
items = m.mapEntryPointOpsQueryRangeResp(resp)
}
return items, nil
}
// executeQuery calls the underlying Querier with the provided request.
func (m *module) executeQuery(ctx context.Context, orgUUID valuer.UUID, qr *qbtypes.QueryRangeRequest) (*qbtypes.QueryRangeResponse, error) {
return m.Querier.QueryRange(ctx, orgUUID, qr)
}
// attachTopLevelOps fetches top-level ops from TelemetryStore and attaches them to items.
func (m *module) attachTopLevelOps(ctx context.Context, serviceNames []string, startMs uint64, items []*servicetypesv1.ResponseItem) error {
startTime := time.UnixMilli(int64(startMs)).UTC()
opsMap, err := m.FetchTopLevelOperations(ctx, startTime, serviceNames)
if err != nil {
return err
}
applyOpsToItems(items, opsMap)
return nil
}
// buildQueryRangeRequest constructs the QBv5 QueryRangeRequest and computes the time window.
func (m *module) buildQueryRangeRequest(req *servicetypesv1.Request) (*qbtypes.QueryRangeRequest, uint64, uint64, error) {
// Parse start/end (nanoseconds) from strings and convert to milliseconds for QBv5
@@ -211,11 +303,6 @@ func (m *module) buildQueryRangeRequest(req *servicetypesv1.Request) (*qbtypes.Q
return &reqV5, startMs, endMs, nil
}
// executeQuery calls the underlying Querier with the provided request.
func (m *module) executeQuery(ctx context.Context, orgUUID valuer.UUID, qr *qbtypes.QueryRangeRequest) (*qbtypes.QueryRangeResponse, error) {
return m.Querier.QueryRange(ctx, orgUUID, qr)
}
// mapQueryRangeRespToServices converts the raw query response into service items and collected service names.
func (m *module) mapQueryRangeRespToServices(resp *qbtypes.QueryRangeResponse, startMs, endMs uint64) ([]*servicetypesv1.ResponseItem, []string) {
if resp == nil || len(resp.Data.Results) == 0 { // no rows
@@ -241,7 +328,7 @@ func (m *module) mapQueryRangeRespToServices(resp *qbtypes.QueryRangeResponse, s
}
}
periodSeconds := float64((endMs - startMs) / 1000)
periodSeconds := float64(endMs-startMs) / 1000.0
out := make([]*servicetypesv1.ResponseItem, 0, len(sd.Data))
serviceNames := make([]string, 0, len(sd.Data))
@@ -285,15 +372,211 @@ func (m *module) mapQueryRangeRespToServices(resp *qbtypes.QueryRangeResponse, s
return out, serviceNames
}
// attachTopLevelOps fetches top-level ops from TelemetryStore and attaches them to items.
func (m *module) attachTopLevelOps(ctx context.Context, serviceNames []string, startMs uint64, items []*servicetypesv1.ResponseItem) error {
startTime := time.UnixMilli(int64(startMs)).UTC()
opsMap, err := m.FetchTopLevelOperations(ctx, startTime, serviceNames)
// buildSpanMetricsQueryRangeRequest constructs span-metrics queries for services.
func (m *module) buildSpanMetricsQueryRangeRequest(req *servicetypesv1.Request) (*qbtypes.QueryRangeRequest, uint64, uint64, error) {
// base filters from request
// Parse start/end (nanoseconds) from strings and convert to milliseconds for QBv5
startNs, err := strconv.ParseUint(req.Start, 10, 64)
if err != nil {
return err
return nil, 0, 0, errors.NewInvalidInputf(errors.CodeInvalidInput, "invalid start time: %v", err)
}
applyOpsToItems(items, opsMap)
return nil
endNs, err := strconv.ParseUint(req.End, 10, 64)
if err != nil {
return nil, 0, 0, errors.NewInvalidInputf(errors.CodeInvalidInput, "invalid end time: %v", err)
}
if startNs >= endNs {
return nil, 0, 0, errors.NewInvalidInputf(errors.CodeInvalidInput, "start must be before end")
}
if err := validateTagFilterItems(req.Tags); err != nil {
return nil, 0, 0, err
}
startMs := startNs / 1_000_000
endMs := endNs / 1_000_000
filterExpr, variables := buildFilterExpression(req.Tags)
// enforce top-level scope via synthetic field
scopeExpr := "isTopLevelOperation = 'true'"
if filterExpr != "" {
filterExpr = "(" + filterExpr + ") AND (" + scopeExpr + ")"
} else {
filterExpr = scopeExpr
}
// Build error filter for num_errors query
var errorFilterExpr string
if filterExpr != "" {
errorFilterExpr = "(" + filterExpr + ") AND (status.code = 'STATUS_CODE_ERROR')"
} else {
errorFilterExpr = "status.code = 'STATUS_CODE_ERROR'"
}
// common groupBy on service.name
groupByService := []qbtypes.GroupByKey{
{TelemetryFieldKey: telemetrytypes.TelemetryFieldKey{
Name: "service.name",
FieldContext: telemetrytypes.FieldContextAttribute, // aligns with working payload
FieldDataType: telemetrytypes.FieldDataTypeString,
Materialized: true,
}},
}
queries := []qbtypes.QueryEnvelope{
{Type: qbtypes.QueryTypeBuilder,
Spec: qbtypes.QueryBuilderQuery[qbtypes.MetricAggregation]{
Name: "p99_latency",
Signal: telemetrytypes.SignalMetrics,
Filter: &qbtypes.Filter{Expression: filterExpr},
GroupBy: groupByService,
StepInterval: spanMetricsSingleBucketStep(startMs, endMs),
Aggregations: []qbtypes.MetricAggregation{
{
MetricName: "signoz_latency.bucket",
Temporality: metrictypes.Delta,
TimeAggregation: metrictypes.TimeAggregationRate,
SpaceAggregation: metrictypes.SpaceAggregationPercentile99,
ReduceTo: qbtypes.ReduceToAvg,
},
},
},
},
{Type: qbtypes.QueryTypeBuilder,
Spec: qbtypes.QueryBuilderQuery[qbtypes.MetricAggregation]{
Name: "num_calls",
Signal: telemetrytypes.SignalMetrics,
Filter: &qbtypes.Filter{Expression: filterExpr},
GroupBy: groupByService,
Aggregations: []qbtypes.MetricAggregation{
{
MetricName: "signoz_calls_total",
Temporality: metrictypes.Delta,
TimeAggregation: metrictypes.TimeAggregationIncrease,
SpaceAggregation: metrictypes.SpaceAggregationSum,
ReduceTo: qbtypes.ReduceToSum,
},
},
},
},
{Type: qbtypes.QueryTypeBuilder,
Spec: qbtypes.QueryBuilderQuery[qbtypes.MetricAggregation]{
Name: "num_errors",
Signal: telemetrytypes.SignalMetrics,
Filter: &qbtypes.Filter{Expression: errorFilterExpr},
GroupBy: groupByService,
Aggregations: []qbtypes.MetricAggregation{
{
MetricName: "signoz_calls_total",
Temporality: metrictypes.Delta,
TimeAggregation: metrictypes.TimeAggregationIncrease,
SpaceAggregation: metrictypes.SpaceAggregationSum,
ReduceTo: qbtypes.ReduceToSum,
},
},
},
},
}
reqV5 := qbtypes.QueryRangeRequest{
Start: startMs,
End: endMs,
RequestType: qbtypes.RequestTypeScalar,
Variables: variables,
CompositeQuery: qbtypes.CompositeQuery{
Queries: queries,
},
NoCache: true,
}
return &reqV5, startMs, endMs, nil
}
// mapSpanMetricsRespToServices merges span-metrics scalar results keyed by service.name using queryName for aggregation mapping.
func (m *module) mapSpanMetricsRespToServices(resp *qbtypes.QueryRangeResponse, startMs, endMs uint64) ([]*servicetypesv1.ResponseItem, []string) {
if resp == nil || len(resp.Data.Results) == 0 {
return []*servicetypesv1.ResponseItem{}, []string{}
}
periodSeconds := float64(endMs-startMs) / 1000.0
type agg struct {
p99Latency float64
numCalls float64
numErrors float64
}
perSvc := make(map[string]*agg)
for _, result := range resp.Data.Results {
sd, ok := result.(*qbtypes.ScalarData)
if !ok || sd == nil || len(sd.Columns) == 0 {
continue
}
// locate service.name column and aggregation column for this query
serviceNameRespIndex := -1
aggIdx := -1
for i, c := range sd.Columns {
switch c.Type {
case qbtypes.ColumnTypeGroup:
if c.Name == "service.name" {
serviceNameRespIndex = i
}
case qbtypes.ColumnTypeAggregation:
if aggIdx == -1 {
aggIdx = i
}
}
}
if serviceNameRespIndex == -1 || aggIdx == -1 {
continue
}
for _, row := range sd.Data {
svcName := fmt.Sprintf("%v", row[serviceNameRespIndex])
a := perSvc[svcName]
if a == nil {
a = &agg{}
perSvc[svcName] = a
}
val := toFloat(row, aggIdx)
switch sd.QueryName {
case "p99_latency":
a.p99Latency = val * math.Pow(10, 6) // convert to nanoseconds because frontend expects this
case "num_calls":
a.numCalls = val
case "num_errors":
a.numErrors = val
}
}
}
out := make([]*servicetypesv1.ResponseItem, 0, len(perSvc))
serviceNames := make([]string, 0, len(perSvc))
for svcName, a := range perSvc {
errorRate := 0.0
if a.numCalls > 0 {
errorRate = a.numErrors * 100 / a.numCalls
}
callRate := 0.0
if a.numCalls > 0 {
callRate = a.numCalls / periodSeconds
}
out = append(out, &servicetypesv1.ResponseItem{
ServiceName: svcName,
Percentile99: a.p99Latency,
CallRate: callRate,
ErrorRate: errorRate,
DataWarning: servicetypesv1.DataWarning{TopLevelOps: []string{}},
})
serviceNames = append(serviceNames, svcName)
}
return out, serviceNames
}
func (m *module) buildTopOpsQueryRangeRequest(req *servicetypesv1.OperationsRequest) (*qbtypes.QueryRangeRequest, error) {
@@ -404,6 +687,531 @@ func (m *module) mapTopOpsQueryRangeResp(resp *qbtypes.QueryRangeResponse) []ser
return out
}
// buildSpanMetricsTopOpsQueryRangeRequest constructs span-metrics queries for top operations.
func (m *module) buildSpanMetricsTopOpsQueryRangeRequest(req *servicetypesv1.OperationsRequest) (*qbtypes.QueryRangeRequest, error) {
if req.Service == "" {
return nil, errors.NewInvalidInputf(errors.CodeInvalidInput, "service is required")
}
startNs, err := strconv.ParseUint(req.Start, 10, 64)
if err != nil {
return nil, errors.NewInvalidInputf(errors.CodeInvalidInput, "invalid start time: %v", err)
}
endNs, err := strconv.ParseUint(req.End, 10, 64)
if err != nil {
return nil, errors.NewInvalidInputf(errors.CodeInvalidInput, "invalid end time: %v", err)
}
if startNs >= endNs {
return nil, errors.NewInvalidInputf(errors.CodeInvalidInput, "start must be before end")
}
if req.Limit < 1 || req.Limit > 5000 {
return nil, errors.NewInvalidInputf(errors.CodeInvalidInput, "limit must be between 1 and 5000")
}
if err := validateTagFilterItems(req.Tags); err != nil {
return nil, err
}
startMs := startNs / 1_000_000
endMs := endNs / 1_000_000
// Build service filter
serviceTag := servicetypesv1.TagFilterItem{
Key: "service.name",
Operator: "in",
StringValues: []string{req.Service},
}
tags := append([]servicetypesv1.TagFilterItem{serviceTag}, req.Tags...)
filterExpr, variables := buildFilterExpression(tags)
// Build error filter for num_errors query
var errorFilterExpr string
if filterExpr != "" {
errorFilterExpr = "(" + filterExpr + ") AND (status.code = 'STATUS_CODE_ERROR')"
} else {
errorFilterExpr = "status.code = 'STATUS_CODE_ERROR'"
}
// Common groupBy on operation
groupByOperation := []qbtypes.GroupByKey{
{TelemetryFieldKey: telemetrytypes.TelemetryFieldKey{
Name: "operation",
FieldContext: telemetrytypes.FieldContextAttribute,
FieldDataType: telemetrytypes.FieldDataTypeString,
Materialized: true,
}},
}
queries := []qbtypes.QueryEnvelope{
{Type: qbtypes.QueryTypeBuilder,
Spec: qbtypes.QueryBuilderQuery[qbtypes.MetricAggregation]{
Name: "p50_latency",
Signal: telemetrytypes.SignalMetrics,
Filter: &qbtypes.Filter{Expression: filterExpr},
GroupBy: groupByOperation,
StepInterval: spanMetricsSingleBucketStep(startMs, endMs),
Aggregations: []qbtypes.MetricAggregation{
{
MetricName: "signoz_latency.bucket",
Temporality: metrictypes.Delta,
TimeAggregation: metrictypes.TimeAggregationRate,
SpaceAggregation: metrictypes.SpaceAggregationPercentile50,
ReduceTo: qbtypes.ReduceToAvg,
},
},
},
},
{Type: qbtypes.QueryTypeBuilder,
Spec: qbtypes.QueryBuilderQuery[qbtypes.MetricAggregation]{
Name: "p95_latency",
Signal: telemetrytypes.SignalMetrics,
Filter: &qbtypes.Filter{Expression: filterExpr},
GroupBy: groupByOperation,
StepInterval: spanMetricsSingleBucketStep(startMs, endMs),
Aggregations: []qbtypes.MetricAggregation{
{
MetricName: "signoz_latency.bucket",
Temporality: metrictypes.Delta,
TimeAggregation: metrictypes.TimeAggregationRate,
SpaceAggregation: metrictypes.SpaceAggregationPercentile95,
ReduceTo: qbtypes.ReduceToAvg,
},
},
},
},
{Type: qbtypes.QueryTypeBuilder,
Spec: qbtypes.QueryBuilderQuery[qbtypes.MetricAggregation]{
Name: "p99_latency",
Signal: telemetrytypes.SignalMetrics,
Filter: &qbtypes.Filter{Expression: filterExpr},
GroupBy: groupByOperation,
StepInterval: spanMetricsSingleBucketStep(startMs, endMs),
Aggregations: []qbtypes.MetricAggregation{
{
MetricName: "signoz_latency.bucket",
Temporality: metrictypes.Delta,
TimeAggregation: metrictypes.TimeAggregationRate,
SpaceAggregation: metrictypes.SpaceAggregationPercentile99,
ReduceTo: qbtypes.ReduceToAvg,
},
},
},
},
{Type: qbtypes.QueryTypeBuilder,
Spec: qbtypes.QueryBuilderQuery[qbtypes.MetricAggregation]{
Name: "num_calls",
Signal: telemetrytypes.SignalMetrics,
Filter: &qbtypes.Filter{Expression: filterExpr},
GroupBy: groupByOperation,
Aggregations: []qbtypes.MetricAggregation{
{
MetricName: "signoz_calls_total",
Temporality: metrictypes.Delta,
TimeAggregation: metrictypes.TimeAggregationIncrease,
SpaceAggregation: metrictypes.SpaceAggregationSum,
ReduceTo: qbtypes.ReduceToSum,
},
},
},
},
{Type: qbtypes.QueryTypeBuilder,
Spec: qbtypes.QueryBuilderQuery[qbtypes.MetricAggregation]{
Name: "num_errors",
Signal: telemetrytypes.SignalMetrics,
Filter: &qbtypes.Filter{Expression: errorFilterExpr},
GroupBy: groupByOperation,
Aggregations: []qbtypes.MetricAggregation{
{
MetricName: "signoz_calls_total",
Temporality: metrictypes.Delta,
TimeAggregation: metrictypes.TimeAggregationIncrease,
SpaceAggregation: metrictypes.SpaceAggregationSum,
ReduceTo: qbtypes.ReduceToSum,
},
},
},
},
}
reqV5 := qbtypes.QueryRangeRequest{
Start: startMs,
End: endMs,
RequestType: qbtypes.RequestTypeScalar,
Variables: variables,
CompositeQuery: qbtypes.CompositeQuery{
Queries: queries,
},
NoCache: true,
}
return &reqV5, nil
}
// mapSpanMetricsTopOpsResp maps span-metrics scalar results to OperationItem array using queryName for aggregation mapping.
func (m *module) mapSpanMetricsTopOpsResp(resp *qbtypes.QueryRangeResponse) []servicetypesv1.OperationItem {
if resp == nil || len(resp.Data.Results) == 0 {
return []servicetypesv1.OperationItem{}
}
// Group data by operation name and merge aggregations from all results
type agg struct {
p50 float64
p95 float64
p99 float64
numCalls uint64
numErrors uint64
}
perOp := make(map[string]*agg)
// Iterate through all results (each query returns a separate ScalarData)
for _, result := range resp.Data.Results {
sd, ok := result.(*qbtypes.ScalarData)
if !ok || sd == nil {
continue
}
// Skip empty results
if len(sd.Columns) == 0 || len(sd.Data) == 0 {
continue
}
// Find operation and aggregation column indices
operationIdx := -1
aggIdx := -1
for i, c := range sd.Columns {
switch c.Type {
case qbtypes.ColumnTypeGroup:
if c.Name == "operation" {
operationIdx = i
}
case qbtypes.ColumnTypeAggregation:
if aggIdx == -1 {
aggIdx = i
}
}
}
if operationIdx == -1 || aggIdx == -1 {
continue
}
// Process each row in this result and merge by operation name
for _, row := range sd.Data {
if len(row) <= operationIdx || len(row) <= aggIdx {
continue
}
opName := fmt.Sprintf("%v", row[operationIdx])
a := perOp[opName]
if a == nil {
a = &agg{}
perOp[opName] = a
}
// Map values based on queryName
switch sd.QueryName {
case "p50_latency":
a.p50 = toFloat(row, aggIdx) * math.Pow(10, 6) // convert milliseconds to nanoseconds
case "p95_latency":
a.p95 = toFloat(row, aggIdx) * math.Pow(10, 6)
case "p99_latency":
a.p99 = toFloat(row, aggIdx) * math.Pow(10, 6)
case "num_calls":
a.numCalls = uint64(toFloat(row, aggIdx))
case "num_errors":
a.numErrors = uint64(toFloat(row, aggIdx))
}
}
}
if len(perOp) == 0 {
return []servicetypesv1.OperationItem{}
}
out := make([]servicetypesv1.OperationItem, 0, len(perOp))
for opName, a := range perOp {
out = append(out, servicetypesv1.OperationItem{
Name: opName,
P50: a.p50,
P95: a.p95,
P99: a.p99,
NumCalls: a.numCalls,
ErrorCount: a.numErrors,
})
}
sort.Slice(out, func(i, j int) bool {
return out[i].P99 > out[j].P99
})
return out
}
// buildSpanMetricsEntryPointOpsQueryRangeRequest constructs span-metrics queries for entry point operations.
// Similar to buildSpanMetricsTopOpsQueryRangeRequest but includes isTopLevelOperation filter.
func (m *module) buildSpanMetricsEntryPointOpsQueryRangeRequest(req *servicetypesv1.OperationsRequest) (*qbtypes.QueryRangeRequest, error) {
if req.Service == "" {
return nil, errors.NewInvalidInputf(errors.CodeInvalidInput, "service is required")
}
startNs, err := strconv.ParseUint(req.Start, 10, 64)
if err != nil {
return nil, errors.NewInvalidInputf(errors.CodeInvalidInput, "invalid start time: %v", err)
}
endNs, err := strconv.ParseUint(req.End, 10, 64)
if err != nil {
return nil, errors.NewInvalidInputf(errors.CodeInvalidInput, "invalid end time: %v", err)
}
if startNs >= endNs {
return nil, errors.NewInvalidInputf(errors.CodeInvalidInput, "start must be before end")
}
if req.Limit < 1 || req.Limit > 5000 {
return nil, errors.NewInvalidInputf(errors.CodeInvalidInput, "limit must be between 1 and 5000")
}
if err := validateTagFilterItems(req.Tags); err != nil {
return nil, err
}
startMs := startNs / 1_000_000
endMs := endNs / 1_000_000
// Build service filter
serviceTag := servicetypesv1.TagFilterItem{
Key: "service.name",
Operator: "in",
StringValues: []string{req.Service},
}
tags := append([]servicetypesv1.TagFilterItem{serviceTag}, req.Tags...)
filterExpr, variables := buildFilterExpression(tags)
// Enforce top-level scope via synthetic field (entry point operations only)
scopeExpr := "isTopLevelOperation = 'true'"
if filterExpr != "" {
filterExpr = "(" + filterExpr + ") AND (" + scopeExpr + ")"
} else {
filterExpr = scopeExpr
}
// Build error filter for num_errors query
var errorFilterExpr string
if filterExpr != "" {
errorFilterExpr = "(" + filterExpr + ") AND (status.code = 'STATUS_CODE_ERROR')"
} else {
errorFilterExpr = "status.code = 'STATUS_CODE_ERROR'"
}
// Common groupBy on operation
groupByOperation := []qbtypes.GroupByKey{
{TelemetryFieldKey: telemetrytypes.TelemetryFieldKey{
Name: "operation",
FieldContext: telemetrytypes.FieldContextAttribute,
FieldDataType: telemetrytypes.FieldDataTypeString,
Materialized: true,
}},
}
queries := []qbtypes.QueryEnvelope{
{Type: qbtypes.QueryTypeBuilder,
Spec: qbtypes.QueryBuilderQuery[qbtypes.MetricAggregation]{
Name: "p50_latency",
Signal: telemetrytypes.SignalMetrics,
Filter: &qbtypes.Filter{Expression: filterExpr},
GroupBy: groupByOperation,
StepInterval: spanMetricsSingleBucketStep(startMs, endMs),
Aggregations: []qbtypes.MetricAggregation{
{
MetricName: "signoz_latency.bucket",
Temporality: metrictypes.Delta,
TimeAggregation: metrictypes.TimeAggregationRate,
SpaceAggregation: metrictypes.SpaceAggregationPercentile50,
ReduceTo: qbtypes.ReduceToAvg,
},
},
},
},
{Type: qbtypes.QueryTypeBuilder,
Spec: qbtypes.QueryBuilderQuery[qbtypes.MetricAggregation]{
Name: "p95_latency",
Signal: telemetrytypes.SignalMetrics,
Filter: &qbtypes.Filter{Expression: filterExpr},
GroupBy: groupByOperation,
StepInterval: spanMetricsSingleBucketStep(startMs, endMs),
Aggregations: []qbtypes.MetricAggregation{
{
MetricName: "signoz_latency.bucket",
Temporality: metrictypes.Delta,
TimeAggregation: metrictypes.TimeAggregationRate,
SpaceAggregation: metrictypes.SpaceAggregationPercentile95,
ReduceTo: qbtypes.ReduceToAvg,
},
},
},
},
{Type: qbtypes.QueryTypeBuilder,
Spec: qbtypes.QueryBuilderQuery[qbtypes.MetricAggregation]{
Name: "p99_latency",
Signal: telemetrytypes.SignalMetrics,
Filter: &qbtypes.Filter{Expression: filterExpr},
GroupBy: groupByOperation,
StepInterval: spanMetricsSingleBucketStep(startMs, endMs),
Aggregations: []qbtypes.MetricAggregation{
{
MetricName: "signoz_latency.bucket",
Temporality: metrictypes.Delta,
TimeAggregation: metrictypes.TimeAggregationRate,
SpaceAggregation: metrictypes.SpaceAggregationPercentile99,
ReduceTo: qbtypes.ReduceToAvg,
},
},
},
},
{Type: qbtypes.QueryTypeBuilder,
Spec: qbtypes.QueryBuilderQuery[qbtypes.MetricAggregation]{
Name: "num_calls",
Signal: telemetrytypes.SignalMetrics,
Filter: &qbtypes.Filter{Expression: filterExpr},
GroupBy: groupByOperation,
Aggregations: []qbtypes.MetricAggregation{
{
MetricName: "signoz_calls_total",
Temporality: metrictypes.Delta,
TimeAggregation: metrictypes.TimeAggregationIncrease,
SpaceAggregation: metrictypes.SpaceAggregationSum,
ReduceTo: qbtypes.ReduceToSum,
},
},
},
},
{Type: qbtypes.QueryTypeBuilder,
Spec: qbtypes.QueryBuilderQuery[qbtypes.MetricAggregation]{
Name: "num_errors",
Signal: telemetrytypes.SignalMetrics,
Filter: &qbtypes.Filter{Expression: errorFilterExpr},
GroupBy: groupByOperation,
Aggregations: []qbtypes.MetricAggregation{
{
MetricName: "signoz_calls_total",
Temporality: metrictypes.Delta,
TimeAggregation: metrictypes.TimeAggregationIncrease,
SpaceAggregation: metrictypes.SpaceAggregationSum,
ReduceTo: qbtypes.ReduceToSum,
},
},
},
},
}
reqV5 := qbtypes.QueryRangeRequest{
Start: startMs,
End: endMs,
RequestType: qbtypes.RequestTypeScalar,
Variables: variables,
CompositeQuery: qbtypes.CompositeQuery{
Queries: queries,
},
NoCache: true,
}
return &reqV5, nil
}
// mapSpanMetricsEntryPointOpsResp maps span-metrics scalar results to OperationItem array for entry point operations.
// Uses queryName for aggregation mapping.
func (m *module) mapSpanMetricsEntryPointOpsResp(resp *qbtypes.QueryRangeResponse) []servicetypesv1.OperationItem {
if resp == nil || len(resp.Data.Results) == 0 {
return []servicetypesv1.OperationItem{}
}
// Group data by operation name and merge aggregations from all results
type agg struct {
p50 float64
p95 float64
p99 float64
numCalls uint64
numErrors uint64
}
perOp := make(map[string]*agg)
// Iterate through all results (each query returns a separate ScalarData)
for _, result := range resp.Data.Results {
sd, ok := result.(*qbtypes.ScalarData)
if !ok || sd == nil {
continue
}
// Skip empty results
if len(sd.Columns) == 0 || len(sd.Data) == 0 {
continue
}
// Find operation and aggregation column indices
operationIdx := -1
aggIdx := -1
for i, c := range sd.Columns {
switch c.Type {
case qbtypes.ColumnTypeGroup:
if c.Name == "operation" {
operationIdx = i
}
case qbtypes.ColumnTypeAggregation:
if aggIdx == -1 {
aggIdx = i
}
}
}
if operationIdx == -1 || aggIdx == -1 {
continue
}
for _, row := range sd.Data {
if len(row) <= operationIdx || len(row) <= aggIdx {
continue
}
opName := fmt.Sprintf("%v", row[operationIdx])
a := perOp[opName]
if a == nil {
a = &agg{}
perOp[opName] = a
}
// Map values based on queryName
switch sd.QueryName {
case "p50_latency":
a.p50 = toFloat(row, aggIdx) * math.Pow(10, 6) // convert seconds to nanoseconds
case "p95_latency":
a.p95 = toFloat(row, aggIdx) * math.Pow(10, 6)
case "p99_latency":
a.p99 = toFloat(row, aggIdx) * math.Pow(10, 6)
case "num_calls":
a.numCalls = uint64(toFloat(row, aggIdx))
case "num_errors":
a.numErrors = uint64(toFloat(row, aggIdx))
}
}
}
if len(perOp) == 0 {
return []servicetypesv1.OperationItem{}
}
out := make([]servicetypesv1.OperationItem, 0, len(perOp))
for opName, a := range perOp {
out = append(out, servicetypesv1.OperationItem{
Name: opName,
P50: a.p50,
P95: a.p95,
P99: a.p99,
NumCalls: a.numCalls,
ErrorCount: a.numErrors,
})
}
sort.Slice(out, func(i, j int) bool {
return out[i].P99 > out[j].P99
})
return out
}
func (m *module) buildEntryPointOpsQueryRangeRequest(req *servicetypesv1.OperationsRequest) (*qbtypes.QueryRangeRequest, error) {
if req.Service == "" {
return nil, errors.NewInvalidInputf(errors.CodeInvalidInput, "service is required")

View File

@@ -40,7 +40,7 @@ func TestNewHandlers(t *testing.T) {
queryParser := queryparser.New(providerSettings)
require.NoError(t, err)
dashboardModule := impldashboard.NewModule(impldashboard.NewStore(sqlstore), providerSettings, nil, orgGetter, queryParser)
modules := NewModules(sqlstore, tokenizer, emailing, providerSettings, orgGetter, alertmanager, nil, nil, nil, nil, nil, nil, nil, queryParser, Config{}, dashboardModule)
modules := NewModules(sqlstore, tokenizer, emailing, providerSettings, orgGetter, alertmanager, nil, nil, nil, nil, nil, nil, nil, queryParser, Config{}, dashboardModule, nil)
handlers := NewHandlers(modules, providerSettings, nil, nil, nil, nil, nil)

View File

@@ -8,6 +8,7 @@ import (
"github.com/SigNoz/signoz/pkg/cache"
"github.com/SigNoz/signoz/pkg/emailing"
"github.com/SigNoz/signoz/pkg/factory"
"github.com/SigNoz/signoz/pkg/flagger"
"github.com/SigNoz/signoz/pkg/modules/apdex"
"github.com/SigNoz/signoz/pkg/modules/apdex/implapdex"
"github.com/SigNoz/signoz/pkg/modules/authdomain"
@@ -85,6 +86,7 @@ func NewModules(
queryParser queryparser.QueryParser,
config Config,
dashboard dashboard.Module,
flagger flagger.Flagger,
) Modules {
quickfilter := implquickfilter.NewModule(implquickfilter.NewStore(sqlstore))
orgSetter := implorganization.NewSetter(implorganization.NewStore(sqlstore), alertmanager, quickfilter)
@@ -107,7 +109,7 @@ func NewModules(
AuthDomain: implauthdomain.NewModule(implauthdomain.NewStore(sqlstore), authNs),
Session: implsession.NewModule(providerSettings, authNs, user, userGetter, implauthdomain.NewModule(implauthdomain.NewStore(sqlstore), authNs), tokenizer, orgGetter),
SpanPercentile: implspanpercentile.NewModule(querier, providerSettings),
Services: implservices.NewModule(querier, telemetryStore),
Services: implservices.NewModule(querier, telemetryStore, flagger),
MetricsExplorer: implmetricsexplorer.NewModule(telemetryStore, telemetryMetadataStore, cache, ruleStore, dashboard, providerSettings, config.MetricsExplorer),
Promote: implpromote.NewModule(telemetryMetadataStore, telemetryStore),
}

View File

@@ -40,7 +40,7 @@ func TestNewModules(t *testing.T) {
queryParser := queryparser.New(providerSettings)
require.NoError(t, err)
dashboardModule := impldashboard.NewModule(impldashboard.NewStore(sqlstore), providerSettings, nil, orgGetter, queryParser)
modules := NewModules(sqlstore, tokenizer, emailing, providerSettings, orgGetter, alertmanager, nil, nil, nil, nil, nil, nil, nil, queryParser, Config{}, dashboardModule)
modules := NewModules(sqlstore, tokenizer, emailing, providerSettings, orgGetter, alertmanager, nil, nil, nil, nil, nil, nil, nil, queryParser, Config{}, dashboardModule, nil)
reflectVal := reflect.ValueOf(modules)
for i := 0; i < reflectVal.NumField(); i++ {

View File

@@ -388,7 +388,7 @@ func New(
// Initialize all modules
roleModule := implrole.NewModule(implrole.NewStore(sqlstore), authz, nil)
dashboardModule := dashboardModuleCallback(sqlstore, providerSettings, analytics, orgGetter, roleModule, queryParser, querier, licensing)
modules := NewModules(sqlstore, tokenizer, emailing, providerSettings, orgGetter, alertmanager, analytics, querier, telemetrystore, telemetryMetadataStore, authNs, authz, cache, queryParser, config, dashboardModule)
modules := NewModules(sqlstore, tokenizer, emailing, providerSettings, orgGetter, alertmanager, analytics, querier, telemetrystore, telemetryMetadataStore, authNs, authz, cache, queryParser, config, dashboardModule, flagger)
// Initialize all handlers for the modules
handlers := NewHandlers(modules, providerSettings, querier, licensing, global, flagger, gateway)

View File

@@ -698,6 +698,25 @@ func (t *telemetryMetaStore) getMetricsKeys(ctx context.Context, fieldKeySelecto
// hit the limit?
complete := rowCount <= limit
for _, selector := range fieldKeySelectors {
for _, key := range telemetrymetrics.MetricScopeFieldDefinitions {
if selector.Signal != telemetrytypes.SignalUnspecified && selector.Signal != telemetrytypes.SignalMetrics {
continue
}
if selector.FieldContext != telemetrytypes.FieldContextUnspecified && selector.FieldContext != key.FieldContext {
continue
}
if selector.FieldDataType != telemetrytypes.FieldDataTypeUnspecified && selector.FieldDataType != key.FieldDataType {
continue
}
if matchesSelectorName(selector.Name, key.Name, selector.SelectorMatchType) {
keys = append(keys, &key)
break
}
}
}
return keys, complete, nil
}

View File

@@ -4,9 +4,11 @@ import (
"context"
"fmt"
"slices"
"strings"
"github.com/SigNoz/signoz/pkg/errors"
"github.com/SigNoz/signoz/pkg/querybuilder"
"github.com/SigNoz/signoz/pkg/telemetrytraces"
qbtypes "github.com/SigNoz/signoz/pkg/types/querybuildertypes/querybuildertypesv5"
"github.com/SigNoz/signoz/pkg/types/telemetrytypes"
@@ -139,9 +141,13 @@ func (c *conditionBuilder) ConditionFor(
operator qbtypes.FilterOperator,
value any,
sb *sqlbuilder.SelectBuilder,
_ uint64,
start uint64,
_ uint64,
) (string, error) {
if c.isMetricScopeField(key.Name) {
return c.buildMetricScopeCondition(operator, value, start)
}
condition, err := c.conditionFor(ctx, key, operator, value, sb)
if err != nil {
return "", err
@@ -149,3 +155,37 @@ func (c *conditionBuilder) ConditionFor(
return condition, nil
}
func (c *conditionBuilder) isMetricScopeField(keyName string) bool {
return keyName == MetricScopeFieldIsTopLevelOperation
}
// buildMetricScopeCondition handles synthetic field isTopLevelOperation for metrics signal.
func (c *conditionBuilder) buildMetricScopeCondition(operator qbtypes.FilterOperator, value any, start uint64) (string, error) {
if operator != qbtypes.FilterOperatorEqual {
return "", errors.NewInvalidInputf(errors.CodeInvalidInput, "%s only supports '=' operator", MetricScopeFieldIsTopLevelOperation)
}
// Accept true in bool or string form; anything else is invalid
isTrue := false
switch v := value.(type) {
case bool:
isTrue = v
case string:
isTrue = strings.ToLower(v) == "true"
default:
return "", errors.NewInvalidInputf(errors.CodeInvalidInput, "%s expects boolean value, got %T", MetricScopeFieldIsTopLevelOperation, value)
}
if !isTrue {
return "", errors.NewInvalidInputf(errors.CodeInvalidInput, "%s can only be filtered with value 'true'", MetricScopeFieldIsTopLevelOperation)
}
startSec := int64(start / 1000)
return fmt.Sprintf(
"((JSONExtractString(labels, 'operation'), JSONExtractString(labels, 'service.name')) GLOBAL IN (SELECT DISTINCT name, serviceName FROM %s.%s WHERE time >= toDateTime(%d)))",
telemetrytraces.DBName,
// telemetrytraces.LocalTopLevelOperationsTableName,
telemetrytraces.TopLevelOperationsTableName,
startSec,
), nil
}

View File

@@ -36,3 +36,14 @@ var IntrinsicMetricFieldDefinitions = map[string]telemetrytypes.TelemetryFieldKe
// FieldDataType: telemetrytypes.FieldDataTypeBool,
// },
}
const MetricScopeFieldIsTopLevelOperation = "isTopLevelOperation"
var MetricScopeFieldDefinitions = map[string]telemetrytypes.TelemetryFieldKey{
MetricScopeFieldIsTopLevelOperation: {
Name: MetricScopeFieldIsTopLevelOperation,
Signal: telemetrytypes.SignalMetrics,
FieldContext: telemetrytypes.FieldContextMetric,
FieldDataType: telemetrytypes.FieldDataTypeBool,
},
}

View File

@@ -1,12 +1,13 @@
package telemetrytraces
const (
DBName = "signoz_traces"
SpanIndexV3TableName = "distributed_signoz_index_v3"
SpanIndexV3LocalTableName = "signoz_index_v3"
TagAttributesV2TableName = "distributed_tag_attributes_v2"
TagAttributesV2LocalTableName = "tag_attributes_v2"
TopLevelOperationsTableName = "distributed_top_level_operations"
TraceSummaryTableName = "distributed_trace_summary"
SpanAttributesKeysTblName = "distributed_span_attributes_keys"
DBName = "signoz_traces"
SpanIndexV3TableName = "distributed_signoz_index_v3"
SpanIndexV3LocalTableName = "signoz_index_v3"
TagAttributesV2TableName = "distributed_tag_attributes_v2"
TagAttributesV2LocalTableName = "tag_attributes_v2"
TopLevelOperationsTableName = "distributed_top_level_operations"
LocalTopLevelOperationsTableName = "top_level_operations"
TraceSummaryTableName = "distributed_trace_summary"
SpanAttributesKeysTblName = "distributed_span_attributes_keys"
)