Compare commits

...

1 Commits

Author SHA1 Message Date
nikhilmantri0902
78b9ce6d4f chore: v2 clusters list api 2026-04-28 18:31:56 +05:30
12 changed files with 1218 additions and 0 deletions

View File

@@ -2396,6 +2396,79 @@ components:
enabled:
type: boolean
type: object
InframonitoringtypesClusterRecord:
properties:
clusterCPU:
format: double
type: number
clusterCPUAllocatable:
format: double
type: number
clusterMemory:
format: double
type: number
clusterMemoryAllocatable:
format: double
type: number
clusterName:
type: string
failedPodCount:
type: integer
meta:
additionalProperties: {}
nullable: true
type: object
notReadyNodesCount:
type: integer
pendingPodCount:
type: integer
readyNodesCount:
type: integer
runningPodCount:
type: integer
succeededPodCount:
type: integer
unknownPodCount:
type: integer
required:
- clusterName
- clusterCPU
- clusterCPUAllocatable
- clusterMemory
- clusterMemoryAllocatable
- readyNodesCount
- notReadyNodesCount
- pendingPodCount
- runningPodCount
- succeededPodCount
- failedPodCount
- unknownPodCount
- meta
type: object
InframonitoringtypesClusters:
properties:
endTimeBeforeRetention:
type: boolean
records:
items:
$ref: '#/components/schemas/InframonitoringtypesClusterRecord'
nullable: true
type: array
requiredMetricsCheck:
$ref: '#/components/schemas/InframonitoringtypesRequiredMetricsCheck'
total:
type: integer
type:
$ref: '#/components/schemas/InframonitoringtypesResponseType'
warning:
$ref: '#/components/schemas/Querybuildertypesv5QueryWarnData'
required:
- type
- records
- total
- requiredMetricsCheck
- endTimeBeforeRetention
type: object
InframonitoringtypesHostFilter:
properties:
expression:
@@ -2691,6 +2764,32 @@ components:
- requiredMetricsCheck
- endTimeBeforeRetention
type: object
InframonitoringtypesPostableClusters:
properties:
end:
format: int64
type: integer
filter:
$ref: '#/components/schemas/Querybuildertypesv5Filter'
groupBy:
items:
$ref: '#/components/schemas/Querybuildertypesv5GroupByKey'
nullable: true
type: array
limit:
type: integer
offset:
type: integer
orderBy:
$ref: '#/components/schemas/Querybuildertypesv5OrderBy'
start:
format: int64
type: integer
required:
- start
- end
- limit
type: object
InframonitoringtypesPostableHosts:
properties:
end:
@@ -11169,6 +11268,78 @@ paths:
summary: Health check
tags:
- health
/api/v2/infra_monitoring/clusters:
post:
deprecated: false
description: 'Returns a paginated list of Kubernetes clusters with key aggregated
metrics derived by summing per-node values within the group: CPU usage, CPU
allocatable, memory working set, memory allocatable. Each row also reports
per-group node counts bucketed by each node''s latest k8s.node.condition_ready
value (readyNodesCount, notReadyNodesCount) and per-group pod counts bucketed
by each pod''s latest k8s.pod.phase value (pendingPodCount, runningPodCount,
succeededPodCount, failedPodCount, unknownPodCount). Each cluster includes
metadata attributes (k8s.cluster.name). The response type is ''list'' for
the default k8s.cluster.name grouping or ''grouped_list'' for custom groupBy
keys; in both modes every row aggregates nodes and pods in the group. Supports
filtering via a filter expression, custom groupBy, ordering by cpu / cpu_allocatable
/ memory / memory_allocatable, and pagination via offset/limit. Also reports
missing required metrics and whether the requested time range falls before
the data retention boundary. Numeric metric fields (clusterCPU, clusterCPUAllocatable,
clusterMemory, clusterMemoryAllocatable) return -1 as a sentinel when no data
is available for that field.'
operationId: ListClusters
requestBody:
content:
application/json:
schema:
$ref: '#/components/schemas/InframonitoringtypesPostableClusters'
responses:
"200":
content:
application/json:
schema:
properties:
data:
$ref: '#/components/schemas/InframonitoringtypesClusters'
status:
type: string
required:
- status
- data
type: object
description: OK
"400":
content:
application/json:
schema:
$ref: '#/components/schemas/RenderErrorResponse'
description: Bad Request
"401":
content:
application/json:
schema:
$ref: '#/components/schemas/RenderErrorResponse'
description: Unauthorized
"403":
content:
application/json:
schema:
$ref: '#/components/schemas/RenderErrorResponse'
description: Forbidden
"500":
content:
application/json:
schema:
$ref: '#/components/schemas/RenderErrorResponse'
description: Internal Server Error
security:
- api_key:
- VIEWER
- tokenizer:
- VIEWER
summary: List Clusters for Infra Monitoring
tags:
- inframonitoring
/api/v2/infra_monitoring/hosts:
post:
deprecated: false

View File

@@ -12,10 +12,12 @@ import type {
} from 'react-query';
import type {
InframonitoringtypesPostableClustersDTO,
InframonitoringtypesPostableHostsDTO,
InframonitoringtypesPostableNamespacesDTO,
InframonitoringtypesPostableNodesDTO,
InframonitoringtypesPostablePodsDTO,
ListClusters200,
ListHosts200,
ListNamespaces200,
ListNodes200,
@@ -26,6 +28,90 @@ import type {
import { GeneratedAPIInstance } from '../../../generatedAPIInstance';
import type { ErrorType, BodyType } from '../../../generatedAPIInstance';
/**
* Returns a paginated list of Kubernetes clusters with key aggregated metrics derived by summing per-node values within the group: CPU usage, CPU allocatable, memory working set, memory allocatable. Each row also reports per-group node counts bucketed by each node's latest k8s.node.condition_ready value (readyNodesCount, notReadyNodesCount) and per-group pod counts bucketed by each pod's latest k8s.pod.phase value (pendingPodCount, runningPodCount, succeededPodCount, failedPodCount, unknownPodCount). Each cluster includes metadata attributes (k8s.cluster.name). The response type is 'list' for the default k8s.cluster.name grouping or 'grouped_list' for custom groupBy keys; in both modes every row aggregates nodes and pods in the group. Supports filtering via a filter expression, custom groupBy, ordering by cpu / cpu_allocatable / memory / memory_allocatable, and pagination via offset/limit. Also reports missing required metrics and whether the requested time range falls before the data retention boundary. Numeric metric fields (clusterCPU, clusterCPUAllocatable, clusterMemory, clusterMemoryAllocatable) return -1 as a sentinel when no data is available for that field.
* @summary List Clusters for Infra Monitoring
*/
export const listClusters = (
inframonitoringtypesPostableClustersDTO: BodyType<InframonitoringtypesPostableClustersDTO>,
signal?: AbortSignal,
) => {
return GeneratedAPIInstance<ListClusters200>({
url: `/api/v2/infra_monitoring/clusters`,
method: 'POST',
headers: { 'Content-Type': 'application/json' },
data: inframonitoringtypesPostableClustersDTO,
signal,
});
};
export const getListClustersMutationOptions = <
TError = ErrorType<RenderErrorResponseDTO>,
TContext = unknown,
>(options?: {
mutation?: UseMutationOptions<
Awaited<ReturnType<typeof listClusters>>,
TError,
{ data: BodyType<InframonitoringtypesPostableClustersDTO> },
TContext
>;
}): UseMutationOptions<
Awaited<ReturnType<typeof listClusters>>,
TError,
{ data: BodyType<InframonitoringtypesPostableClustersDTO> },
TContext
> => {
const mutationKey = ['listClusters'];
const { mutation: mutationOptions } = options
? options.mutation &&
'mutationKey' in options.mutation &&
options.mutation.mutationKey
? options
: { ...options, mutation: { ...options.mutation, mutationKey } }
: { mutation: { mutationKey } };
const mutationFn: MutationFunction<
Awaited<ReturnType<typeof listClusters>>,
{ data: BodyType<InframonitoringtypesPostableClustersDTO> }
> = (props) => {
const { data } = props ?? {};
return listClusters(data);
};
return { mutationFn, ...mutationOptions };
};
export type ListClustersMutationResult = NonNullable<
Awaited<ReturnType<typeof listClusters>>
>;
export type ListClustersMutationBody =
BodyType<InframonitoringtypesPostableClustersDTO>;
export type ListClustersMutationError = ErrorType<RenderErrorResponseDTO>;
/**
* @summary List Clusters for Infra Monitoring
*/
export const useListClusters = <
TError = ErrorType<RenderErrorResponseDTO>,
TContext = unknown,
>(options?: {
mutation?: UseMutationOptions<
Awaited<ReturnType<typeof listClusters>>,
TError,
{ data: BodyType<InframonitoringtypesPostableClustersDTO> },
TContext
>;
}): UseMutationResult<
Awaited<ReturnType<typeof listClusters>>,
TError,
{ data: BodyType<InframonitoringtypesPostableClustersDTO> },
TContext
> => {
const mutationOptions = getListClustersMutationOptions(options);
return useMutation(mutationOptions);
};
/**
* Returns a paginated list of hosts with key infrastructure metrics: CPU usage (%), memory usage (%), I/O wait (%), disk usage (%), and 15-minute load average. Each host includes its current status (active/inactive based on metrics reported in the last 10 minutes) and metadata attributes (e.g., os.type). Supports filtering via a filter expression, filtering by host status, custom groupBy to aggregate hosts by any attribute, ordering by any of the five metrics, and pagination via offset/limit. The response type is 'list' for the default host.name grouping or 'grouped_list' for custom groupBy keys. Also reports missing required metrics and whether the requested time range falls before the data retention boundary. Numeric metric fields (cpu, memory, wait, load15, diskUsage) return -1 as a sentinel when no data is available for that field.
* @summary List Hosts for Infra Monitoring

View File

@@ -3158,6 +3158,92 @@ export interface GlobaltypesTokenizerConfigDTO {
enabled?: boolean;
}
/**
* @nullable
*/
export type InframonitoringtypesClusterRecordDTOMeta = {
[key: string]: unknown;
} | null;
export interface InframonitoringtypesClusterRecordDTO {
/**
* @type number
* @format double
*/
clusterCPU: number;
/**
* @type number
* @format double
*/
clusterCPUAllocatable: number;
/**
* @type number
* @format double
*/
clusterMemory: number;
/**
* @type number
* @format double
*/
clusterMemoryAllocatable: number;
/**
* @type string
*/
clusterName: string;
/**
* @type integer
*/
failedPodCount: number;
/**
* @type object
* @nullable true
*/
meta: InframonitoringtypesClusterRecordDTOMeta;
/**
* @type integer
*/
notReadyNodesCount: number;
/**
* @type integer
*/
pendingPodCount: number;
/**
* @type integer
*/
readyNodesCount: number;
/**
* @type integer
*/
runningPodCount: number;
/**
* @type integer
*/
succeededPodCount: number;
/**
* @type integer
*/
unknownPodCount: number;
}
export interface InframonitoringtypesClustersDTO {
/**
* @type boolean
*/
endTimeBeforeRetention: boolean;
/**
* @type array
* @nullable true
*/
records: InframonitoringtypesClusterRecordDTO[] | null;
requiredMetricsCheck: InframonitoringtypesRequiredMetricsCheckDTO;
/**
* @type integer
*/
total: number;
type: InframonitoringtypesResponseTypeDTO;
warning?: Querybuildertypesv5QueryWarnDataDTO;
}
export interface InframonitoringtypesHostFilterDTO {
/**
* @type string
@@ -3485,6 +3571,34 @@ export interface InframonitoringtypesPodsDTO {
warning?: Querybuildertypesv5QueryWarnDataDTO;
}
export interface InframonitoringtypesPostableClustersDTO {
/**
* @type integer
* @format int64
*/
end: number;
filter?: Querybuildertypesv5FilterDTO;
/**
* @type array
* @nullable true
*/
groupBy?: Querybuildertypesv5GroupByKeyDTO[] | null;
/**
* @type integer
*/
limit: number;
/**
* @type integer
*/
offset?: number;
orderBy?: Querybuildertypesv5OrderByDTO;
/**
* @type integer
* @format int64
*/
start: number;
}
export interface InframonitoringtypesPostableHostsDTO {
/**
* @type integer
@@ -7668,6 +7782,14 @@ export type Healthz503 = {
status: string;
};
export type ListClusters200 = {
data: InframonitoringtypesClustersDTO;
/**
* @type string
*/
status: string;
};
export type ListHosts200 = {
data: InframonitoringtypesHostsDTO;
/**

View File

@@ -86,5 +86,24 @@ func (provider *provider) addInfraMonitoringRoutes(router *mux.Router) error {
return err
}
if err := router.Handle("/api/v2/infra_monitoring/clusters", handler.New(
provider.authZ.ViewAccess(provider.infraMonitoringHandler.ListClusters),
handler.OpenAPIDef{
ID: "ListClusters",
Tags: []string{"inframonitoring"},
Summary: "List Clusters for Infra Monitoring",
Description: "Returns a paginated list of Kubernetes clusters with key aggregated metrics derived by summing per-node values within the group: CPU usage, CPU allocatable, memory working set, memory allocatable. Each row also reports per-group node counts bucketed by each node's latest k8s.node.condition_ready value (readyNodesCount, notReadyNodesCount) and per-group pod counts bucketed by each pod's latest k8s.pod.phase value (pendingPodCount, runningPodCount, succeededPodCount, failedPodCount, unknownPodCount). Each cluster includes metadata attributes (k8s.cluster.name). The response type is 'list' for the default k8s.cluster.name grouping or 'grouped_list' for custom groupBy keys; in both modes every row aggregates nodes and pods in the group. Supports filtering via a filter expression, custom groupBy, ordering by cpu / cpu_allocatable / memory / memory_allocatable, and pagination via offset/limit. Also reports missing required metrics and whether the requested time range falls before the data retention boundary. Numeric metric fields (clusterCPU, clusterCPUAllocatable, clusterMemory, clusterMemoryAllocatable) return -1 as a sentinel when no data is available for that field.",
Request: new(inframonitoringtypes.PostableClusters),
RequestContentType: "application/json",
Response: new(inframonitoringtypes.Clusters),
ResponseContentType: "application/json",
SuccessStatusCode: http.StatusOK,
ErrorStatusCodes: []int{http.StatusBadRequest, http.StatusUnauthorized},
Deprecated: false,
SecuritySchemes: newSecuritySchemes(types.RoleViewer),
})).Methods(http.MethodPost).GetError(); err != nil {
return err
}
return nil
}

View File

@@ -0,0 +1,136 @@
package implinframonitoring
import (
"context"
"slices"
"github.com/SigNoz/signoz/pkg/types/inframonitoringtypes"
qbtypes "github.com/SigNoz/signoz/pkg/types/querybuildertypes/querybuildertypesv5"
"github.com/SigNoz/signoz/pkg/valuer"
)
// buildClusterRecords assembles the page records. Node condition counts and
// pod phase counts come from the respective per-group maps in both modes;
// every row is a group of nodes+pods, so there's no per-row "current state"
// concept (analogous to namespaces).
func buildClusterRecords(
resp *qbtypes.QueryRangeResponse,
pageGroups []map[string]string,
groupBy []qbtypes.GroupByKey,
metadataMap map[string]map[string]string,
nodeConditionCountsMap map[string]nodeConditionCounts,
podPhaseCountsMap map[string]podPhaseCounts,
) []inframonitoringtypes.ClusterRecord {
metricsMap := parseFullQueryResponse(resp, groupBy)
records := make([]inframonitoringtypes.ClusterRecord, 0, len(pageGroups))
for _, labels := range pageGroups {
compositeKey := compositeKeyFromLabels(labels, groupBy)
clusterName := labels[clusterNameAttrKey]
record := inframonitoringtypes.ClusterRecord{ // initialize with default values
ClusterName: clusterName,
ClusterCPU: -1,
ClusterCPUAllocatable: -1,
ClusterMemory: -1,
ClusterMemoryAllocatable: -1,
Meta: map[string]any{},
}
if metrics, ok := metricsMap[compositeKey]; ok {
if v, exists := metrics["A"]; exists {
record.ClusterCPU = v
}
if v, exists := metrics["B"]; exists {
record.ClusterCPUAllocatable = v
}
if v, exists := metrics["C"]; exists {
record.ClusterMemory = v
}
if v, exists := metrics["D"]; exists {
record.ClusterMemoryAllocatable = v
}
}
if conditionCountsForGroup, ok := nodeConditionCountsMap[compositeKey]; ok {
record.ReadyNodesCount = conditionCountsForGroup.Ready
record.NotReadyNodesCount = conditionCountsForGroup.NotReady
}
if phaseCountsForGroup, ok := podPhaseCountsMap[compositeKey]; ok {
record.PendingPodCount = phaseCountsForGroup.Pending
record.RunningPodCount = phaseCountsForGroup.Running
record.SucceededPodCount = phaseCountsForGroup.Succeeded
record.FailedPodCount = phaseCountsForGroup.Failed
record.UnknownPodCount = phaseCountsForGroup.Unknown
}
if attrs, ok := metadataMap[compositeKey]; ok {
for k, v := range attrs {
record.Meta[k] = v
}
}
records = append(records, record)
}
return records
}
func (m *module) getTopClusterGroups(
ctx context.Context,
orgID valuer.UUID,
req *inframonitoringtypes.PostableClusters,
metadataMap map[string]map[string]string,
) ([]map[string]string, error) {
orderByKey := req.OrderBy.Key.Name
queryNamesForOrderBy := orderByToClustersQueryNames[orderByKey]
rankingQueryName := queryNamesForOrderBy[len(queryNamesForOrderBy)-1]
topReq := &qbtypes.QueryRangeRequest{
Start: uint64(req.Start),
End: uint64(req.End),
RequestType: qbtypes.RequestTypeScalar,
CompositeQuery: qbtypes.CompositeQuery{
Queries: make([]qbtypes.QueryEnvelope, 0, len(queryNamesForOrderBy)),
},
}
for _, envelope := range m.newClustersTableListQuery().CompositeQuery.Queries {
if !slices.Contains(queryNamesForOrderBy, envelope.GetQueryName()) {
continue
}
copied := envelope
if copied.Type == qbtypes.QueryTypeBuilder {
existingExpr := ""
if f := copied.GetFilter(); f != nil {
existingExpr = f.Expression
}
reqFilterExpr := ""
if req.Filter != nil {
reqFilterExpr = req.Filter.Expression
}
merged := mergeFilterExpressions(existingExpr, reqFilterExpr)
copied.SetFilter(&qbtypes.Filter{Expression: merged})
copied.SetGroupBy(req.GroupBy)
}
topReq.CompositeQuery.Queries = append(topReq.CompositeQuery.Queries, copied)
}
resp, err := m.querier.QueryRange(ctx, orgID, topReq)
if err != nil {
return nil, err
}
allMetricGroups := parseAndSortGroups(resp, rankingQueryName, req.GroupBy, req.OrderBy.Direction)
return paginateWithBackfill(allMetricGroups, metadataMap, req.GroupBy, req.Offset, req.Limit), nil
}
func (m *module) getClustersTableMetadata(ctx context.Context, req *inframonitoringtypes.PostableClusters) (map[string]map[string]string, error) {
var nonGroupByAttrs []string
for _, key := range clusterAttrKeysForMetadata {
if !isKeyInGroupByAttrs(req.GroupBy, key) {
nonGroupByAttrs = append(nonGroupByAttrs, key)
}
}
return m.getMetadata(ctx, clustersTableMetricNamesList, req.GroupBy, nonGroupByAttrs, req.Filter, req.Start, req.End)
}

View File

@@ -0,0 +1,140 @@
package implinframonitoring
import (
"github.com/SigNoz/signoz/pkg/types/inframonitoringtypes"
"github.com/SigNoz/signoz/pkg/types/metrictypes"
qbtypes "github.com/SigNoz/signoz/pkg/types/querybuildertypes/querybuildertypesv5"
"github.com/SigNoz/signoz/pkg/types/telemetrytypes"
)
// TODO(nikhilmantri0902): change to k8s.cluster.uid after showing the missing
// data banner. Carried forward from v1 (see k8sClusterUIDAttrKey in
// pkg/query-service/app/inframetrics/clusters.go).
const clusterNameAttrKey = "k8s.cluster.name"
var clusterNameGroupByKey = qbtypes.GroupByKey{
TelemetryFieldKey: telemetrytypes.TelemetryFieldKey{
Name: clusterNameAttrKey,
FieldContext: telemetrytypes.FieldContextResource,
FieldDataType: telemetrytypes.FieldDataTypeString,
},
}
// clustersTableMetricNamesList drives the existence/retention check.
// Includes k8s.node.condition_ready and k8s.pod.phase so the response
// short-circuits cleanly when a cluster doesn't ship those metrics — even
// though they aren't part of the QB composite query (they're queried separately
// via getPerGroupNodeConditionCounts and getPerGroupPodPhaseCounts).
var clustersTableMetricNamesList = []string{
"k8s.node.cpu.usage",
"k8s.node.allocatable_cpu",
"k8s.node.memory.working_set",
"k8s.node.allocatable_memory",
"k8s.node.condition_ready",
"k8s.pod.phase",
}
var clusterAttrKeysForMetadata = []string{
"k8s.cluster.name",
}
var orderByToClustersQueryNames = map[string][]string{
inframonitoringtypes.ClustersOrderByCPU: {"A"},
inframonitoringtypes.ClustersOrderByCPUAllocatable: {"B"},
inframonitoringtypes.ClustersOrderByMemory: {"C"},
inframonitoringtypes.ClustersOrderByMemoryAllocatable: {"D"},
}
// newClustersTableListQuery builds the composite QB v5 request for the clusters list.
// Cluster-scope metrics are derived by summing per-node metrics within the
// group (default group: k8s.cluster.name). Node condition counts and pod phase
// counts are derived separately via getPerGroupNodeConditionCounts and
// getPerGroupPodPhaseCounts respectively (works for both list and grouped_list
// modes), so neither is included here. Query letters A/B/C/D mirror the v1
// implementation and the v2 nodes list.
func (m *module) newClustersTableListQuery() *qbtypes.QueryRangeRequest {
queries := []qbtypes.QueryEnvelope{
// Query A: CPU usage — sum of node CPU within the group.
{
Type: qbtypes.QueryTypeBuilder,
Spec: qbtypes.QueryBuilderQuery[qbtypes.MetricAggregation]{
Name: "A",
Signal: telemetrytypes.SignalMetrics,
Aggregations: []qbtypes.MetricAggregation{
{
MetricName: "k8s.node.cpu.usage",
TimeAggregation: metrictypes.TimeAggregationAvg,
SpaceAggregation: metrictypes.SpaceAggregationSum,
ReduceTo: qbtypes.ReduceToAvg,
},
},
GroupBy: []qbtypes.GroupByKey{clusterNameGroupByKey},
Disabled: false,
},
},
// Query B: CPU allocatable — sum of node allocatable CPU within the group.
// TimeAggregationLatest is the closest v5 equivalent of v1's AnyLast;
// allocatable values change rarely so divergence in practice is negligible.
{
Type: qbtypes.QueryTypeBuilder,
Spec: qbtypes.QueryBuilderQuery[qbtypes.MetricAggregation]{
Name: "B",
Signal: telemetrytypes.SignalMetrics,
Aggregations: []qbtypes.MetricAggregation{
{
MetricName: "k8s.node.allocatable_cpu",
TimeAggregation: metrictypes.TimeAggregationLatest,
SpaceAggregation: metrictypes.SpaceAggregationSum,
ReduceTo: qbtypes.ReduceToAvg,
},
},
GroupBy: []qbtypes.GroupByKey{clusterNameGroupByKey},
Disabled: false,
},
},
// Query C: Memory working set — sum of node memory within the group.
{
Type: qbtypes.QueryTypeBuilder,
Spec: qbtypes.QueryBuilderQuery[qbtypes.MetricAggregation]{
Name: "C",
Signal: telemetrytypes.SignalMetrics,
Aggregations: []qbtypes.MetricAggregation{
{
MetricName: "k8s.node.memory.working_set",
TimeAggregation: metrictypes.TimeAggregationAvg,
SpaceAggregation: metrictypes.SpaceAggregationSum,
ReduceTo: qbtypes.ReduceToAvg,
},
},
GroupBy: []qbtypes.GroupByKey{clusterNameGroupByKey},
Disabled: false,
},
},
// Query D: Memory allocatable — sum of node allocatable memory within the group.
// Same Latest caveat as Query B.
{
Type: qbtypes.QueryTypeBuilder,
Spec: qbtypes.QueryBuilderQuery[qbtypes.MetricAggregation]{
Name: "D",
Signal: telemetrytypes.SignalMetrics,
Aggregations: []qbtypes.MetricAggregation{
{
MetricName: "k8s.node.allocatable_memory",
TimeAggregation: metrictypes.TimeAggregationLatest,
SpaceAggregation: metrictypes.SpaceAggregationSum,
ReduceTo: qbtypes.ReduceToAvg,
},
},
GroupBy: []qbtypes.GroupByKey{clusterNameGroupByKey},
Disabled: false,
},
},
}
return &qbtypes.QueryRangeRequest{
RequestType: qbtypes.RequestTypeScalar,
CompositeQuery: qbtypes.CompositeQuery{
Queries: queries,
},
}
}

View File

@@ -117,3 +117,27 @@ func (h *handler) ListNamespaces(rw http.ResponseWriter, req *http.Request) {
render.Success(rw, http.StatusOK, result)
}
func (h *handler) ListClusters(rw http.ResponseWriter, req *http.Request) {
claims, err := authtypes.ClaimsFromContext(req.Context())
if err != nil {
render.Error(rw, err)
return
}
orgID := valuer.MustNewUUID(claims.OrgID)
var parsedReq inframonitoringtypes.PostableClusters
if err := binding.JSON.BindBody(req.Body, &parsedReq); err != nil {
render.Error(rw, err)
return
}
result, err := h.module.ListClusters(req.Context(), orgID, &parsedReq)
if err != nil {
render.Error(rw, err)
return
}
render.Success(rw, http.StatusOK, result)
}

View File

@@ -414,3 +414,105 @@ func (m *module) ListNamespaces(ctx context.Context, orgID valuer.UUID, req *inf
return resp, nil
}
func (m *module) ListClusters(ctx context.Context, orgID valuer.UUID, req *inframonitoringtypes.PostableClusters) (*inframonitoringtypes.Clusters, error) {
if err := req.Validate(); err != nil {
return nil, err
}
resp := &inframonitoringtypes.Clusters{}
if req.OrderBy == nil {
req.OrderBy = &qbtypes.OrderBy{
Key: qbtypes.OrderByKey{
TelemetryFieldKey: telemetrytypes.TelemetryFieldKey{
Name: inframonitoringtypes.ClustersOrderByCPU,
},
},
Direction: qbtypes.OrderDirectionDesc,
}
}
if len(req.GroupBy) == 0 {
req.GroupBy = []qbtypes.GroupByKey{clusterNameGroupByKey}
resp.Type = inframonitoringtypes.ResponseTypeList
} else {
resp.Type = inframonitoringtypes.ResponseTypeGroupedList
}
missingMetrics, minFirstReportedUnixMilli, err := m.getMetricsExistenceAndEarliestTime(ctx, clustersTableMetricNamesList)
if err != nil {
return nil, err
}
if len(missingMetrics) > 0 {
resp.RequiredMetricsCheck = inframonitoringtypes.RequiredMetricsCheck{MissingMetrics: missingMetrics}
resp.Records = []inframonitoringtypes.ClusterRecord{}
resp.Total = 0
return resp, nil
}
if req.End < int64(minFirstReportedUnixMilli) {
resp.EndTimeBeforeRetention = true
resp.Records = []inframonitoringtypes.ClusterRecord{}
resp.Total = 0
return resp, nil
}
resp.RequiredMetricsCheck = inframonitoringtypes.RequiredMetricsCheck{MissingMetrics: []string{}}
metadataMap, err := m.getClustersTableMetadata(ctx, req)
if err != nil {
return nil, err
}
resp.Total = len(metadataMap)
pageGroups, err := m.getTopClusterGroups(ctx, orgID, req, metadataMap)
if err != nil {
return nil, err
}
if len(pageGroups) == 0 {
resp.Records = []inframonitoringtypes.ClusterRecord{}
return resp, nil
}
filterExpr := ""
if req.Filter != nil {
filterExpr = req.Filter.Expression
}
fullQueryReq := buildFullQueryRequest(req.Start, req.End, filterExpr, req.GroupBy, pageGroups, m.newClustersTableListQuery())
queryResp, err := m.querier.QueryRange(ctx, orgID, fullQueryReq)
if err != nil {
return nil, err
}
// Reuse the nodes condition-counts CTE function via a temp struct — it reads only
// Start/End/Filter/GroupBy from PostableNodes. With default groupBy
// [k8s.cluster.name], counts are bucketed per cluster; with a custom groupBy,
// they aggregate across clusters in that group.
nodeConditionCountsMap, err := m.getPerGroupNodeConditionCounts(ctx, &inframonitoringtypes.PostableNodes{
Start: req.Start,
End: req.End,
Filter: req.Filter,
GroupBy: req.GroupBy,
}, pageGroups)
if err != nil {
return nil, err
}
// Same pattern for pod phase counts via PostablePods shim.
podPhaseCountsMap, err := m.getPerGroupPodPhaseCounts(ctx, &inframonitoringtypes.PostablePods{
Start: req.Start,
End: req.End,
Filter: req.Filter,
GroupBy: req.GroupBy,
}, pageGroups)
if err != nil {
return nil, err
}
resp.Records = buildClusterRecords(queryResp, pageGroups, req.GroupBy, metadataMap, nodeConditionCountsMap, podPhaseCountsMap)
resp.Warning = queryResp.Warning
return resp, nil
}

View File

@@ -13,6 +13,7 @@ type Handler interface {
ListPods(http.ResponseWriter, *http.Request)
ListNodes(http.ResponseWriter, *http.Request)
ListNamespaces(http.ResponseWriter, *http.Request)
ListClusters(http.ResponseWriter, *http.Request)
}
type Module interface {
@@ -20,4 +21,5 @@ type Module interface {
ListPods(ctx context.Context, orgID valuer.UUID, req *inframonitoringtypes.PostablePods) (*inframonitoringtypes.Pods, error)
ListNodes(ctx context.Context, orgID valuer.UUID, req *inframonitoringtypes.PostableNodes) (*inframonitoringtypes.Nodes, error)
ListNamespaces(ctx context.Context, orgID valuer.UUID, req *inframonitoringtypes.PostableNamespaces) (*inframonitoringtypes.Namespaces, error)
ListClusters(ctx context.Context, orgID valuer.UUID, req *inframonitoringtypes.PostableClusters) (*inframonitoringtypes.Clusters, error)
}

View File

@@ -0,0 +1,110 @@
package inframonitoringtypes
import (
"encoding/json"
"slices"
"github.com/SigNoz/signoz/pkg/errors"
qbtypes "github.com/SigNoz/signoz/pkg/types/querybuildertypes/querybuildertypesv5"
)
type Clusters struct {
Type ResponseType `json:"type" required:"true"`
Records []ClusterRecord `json:"records" required:"true"`
Total int `json:"total" required:"true"`
RequiredMetricsCheck RequiredMetricsCheck `json:"requiredMetricsCheck" required:"true"`
EndTimeBeforeRetention bool `json:"endTimeBeforeRetention" required:"true"`
Warning *qbtypes.QueryWarnData `json:"warning,omitempty"`
}
type ClusterRecord struct {
// TODO(nikhilmantri0902): once the underlying attr key is migrated to
// k8s.cluster.uid (see clusterNameAttrKey TODO in implinframonitoring),
// surface ClusterUID alongside (or replace) ClusterName.
ClusterName string `json:"clusterName" required:"true"`
ClusterCPU float64 `json:"clusterCPU" required:"true"`
ClusterCPUAllocatable float64 `json:"clusterCPUAllocatable" required:"true"`
ClusterMemory float64 `json:"clusterMemory" required:"true"`
ClusterMemoryAllocatable float64 `json:"clusterMemoryAllocatable" required:"true"`
ReadyNodesCount int `json:"readyNodesCount" required:"true"`
NotReadyNodesCount int `json:"notReadyNodesCount" required:"true"`
PendingPodCount int `json:"pendingPodCount" required:"true"`
RunningPodCount int `json:"runningPodCount" required:"true"`
SucceededPodCount int `json:"succeededPodCount" required:"true"`
FailedPodCount int `json:"failedPodCount" required:"true"`
UnknownPodCount int `json:"unknownPodCount" required:"true"`
Meta map[string]interface{} `json:"meta" required:"true"`
}
// PostableClusters is the request body for the v2 clusters list API.
type PostableClusters struct {
Start int64 `json:"start" required:"true"`
End int64 `json:"end" required:"true"`
Filter *qbtypes.Filter `json:"filter"`
GroupBy []qbtypes.GroupByKey `json:"groupBy"`
OrderBy *qbtypes.OrderBy `json:"orderBy"`
Offset int `json:"offset"`
Limit int `json:"limit" required:"true"`
}
// Validate ensures PostableClusters contains acceptable values.
func (req *PostableClusters) Validate() error {
if req == nil {
return errors.NewInvalidInputf(errors.CodeInvalidInput, "request is nil")
}
if req.Start <= 0 {
return errors.NewInvalidInputf(
errors.CodeInvalidInput,
"invalid start time %d: start must be greater than 0",
req.Start,
)
}
if req.End <= 0 {
return errors.NewInvalidInputf(
errors.CodeInvalidInput,
"invalid end time %d: end must be greater than 0",
req.End,
)
}
if req.Start >= req.End {
return errors.NewInvalidInputf(
errors.CodeInvalidInput,
"invalid time range: start (%d) must be less than end (%d)",
req.Start,
req.End,
)
}
if req.Limit < 1 || req.Limit > 5000 {
return errors.NewInvalidInputf(errors.CodeInvalidInput, "limit must be between 1 and 5000")
}
if req.Offset < 0 {
return errors.NewInvalidInputf(errors.CodeInvalidInput, "offset cannot be negative")
}
if req.OrderBy != nil {
if !slices.Contains(ClustersValidOrderByKeys, req.OrderBy.Key.Name) {
return errors.NewInvalidInputf(errors.CodeInvalidInput, "invalid order by key: %s", req.OrderBy.Key.Name)
}
if req.OrderBy.Direction != qbtypes.OrderDirectionAsc && req.OrderBy.Direction != qbtypes.OrderDirectionDesc {
return errors.NewInvalidInputf(errors.CodeInvalidInput, "invalid order by direction: %s", req.OrderBy.Direction)
}
}
return nil
}
// UnmarshalJSON validates input immediately after decoding.
func (req *PostableClusters) UnmarshalJSON(data []byte) error {
type raw PostableClusters
var decoded raw
if err := json.Unmarshal(data, &decoded); err != nil {
return err
}
*req = PostableClusters(decoded)
return req.Validate()
}

View File

@@ -0,0 +1,15 @@
package inframonitoringtypes
const (
ClustersOrderByCPU = "cpu"
ClustersOrderByCPUAllocatable = "cpu_allocatable"
ClustersOrderByMemory = "memory"
ClustersOrderByMemoryAllocatable = "memory_allocatable"
)
var ClustersValidOrderByKeys = []string{
ClustersOrderByCPU,
ClustersOrderByCPUAllocatable,
ClustersOrderByMemory,
ClustersOrderByMemoryAllocatable,
}

View File

@@ -0,0 +1,291 @@
package inframonitoringtypes
import (
"testing"
"github.com/SigNoz/signoz/pkg/errors"
qbtypes "github.com/SigNoz/signoz/pkg/types/querybuildertypes/querybuildertypesv5"
"github.com/SigNoz/signoz/pkg/types/telemetrytypes"
"github.com/SigNoz/signoz/pkg/valuer"
"github.com/stretchr/testify/require"
)
func TestPostableClusters_Validate(t *testing.T) {
tests := []struct {
name string
req *PostableClusters
wantErr bool
}{
{
name: "valid request",
req: &PostableClusters{
Start: 1000,
End: 2000,
Limit: 100,
Offset: 0,
},
wantErr: false,
},
{
name: "nil request",
req: nil,
wantErr: true,
},
{
name: "start time zero",
req: &PostableClusters{
Start: 0,
End: 2000,
Limit: 100,
Offset: 0,
},
wantErr: true,
},
{
name: "start time negative",
req: &PostableClusters{
Start: -1000,
End: 2000,
Limit: 100,
Offset: 0,
},
wantErr: true,
},
{
name: "end time zero",
req: &PostableClusters{
Start: 1000,
End: 0,
Limit: 100,
Offset: 0,
},
wantErr: true,
},
{
name: "start time greater than end time",
req: &PostableClusters{
Start: 2000,
End: 1000,
Limit: 100,
Offset: 0,
},
wantErr: true,
},
{
name: "start time equal to end time",
req: &PostableClusters{
Start: 1000,
End: 1000,
Limit: 100,
Offset: 0,
},
wantErr: true,
},
{
name: "limit zero",
req: &PostableClusters{
Start: 1000,
End: 2000,
Limit: 0,
Offset: 0,
},
wantErr: true,
},
{
name: "limit negative",
req: &PostableClusters{
Start: 1000,
End: 2000,
Limit: -10,
Offset: 0,
},
wantErr: true,
},
{
name: "limit exceeds max",
req: &PostableClusters{
Start: 1000,
End: 2000,
Limit: 5001,
Offset: 0,
},
wantErr: true,
},
{
name: "offset negative",
req: &PostableClusters{
Start: 1000,
End: 2000,
Limit: 100,
Offset: -5,
},
wantErr: true,
},
{
name: "orderBy nil is valid",
req: &PostableClusters{
Start: 1000,
End: 2000,
Limit: 100,
Offset: 0,
},
wantErr: false,
},
{
name: "orderBy with valid key cpu and direction asc",
req: &PostableClusters{
Start: 1000,
End: 2000,
Limit: 100,
Offset: 0,
OrderBy: &qbtypes.OrderBy{
Key: qbtypes.OrderByKey{
TelemetryFieldKey: telemetrytypes.TelemetryFieldKey{
Name: ClustersOrderByCPU,
},
},
Direction: qbtypes.OrderDirectionAsc,
},
},
wantErr: false,
},
{
name: "orderBy with valid key cpu_allocatable and direction desc",
req: &PostableClusters{
Start: 1000,
End: 2000,
Limit: 100,
Offset: 0,
OrderBy: &qbtypes.OrderBy{
Key: qbtypes.OrderByKey{
TelemetryFieldKey: telemetrytypes.TelemetryFieldKey{
Name: ClustersOrderByCPUAllocatable,
},
},
Direction: qbtypes.OrderDirectionDesc,
},
},
wantErr: false,
},
{
name: "orderBy with valid key memory and direction desc",
req: &PostableClusters{
Start: 1000,
End: 2000,
Limit: 100,
Offset: 0,
OrderBy: &qbtypes.OrderBy{
Key: qbtypes.OrderByKey{
TelemetryFieldKey: telemetrytypes.TelemetryFieldKey{
Name: ClustersOrderByMemory,
},
},
Direction: qbtypes.OrderDirectionDesc,
},
},
wantErr: false,
},
{
name: "orderBy with valid key memory_allocatable and direction asc",
req: &PostableClusters{
Start: 1000,
End: 2000,
Limit: 100,
Offset: 0,
OrderBy: &qbtypes.OrderBy{
Key: qbtypes.OrderByKey{
TelemetryFieldKey: telemetrytypes.TelemetryFieldKey{
Name: ClustersOrderByMemoryAllocatable,
},
},
Direction: qbtypes.OrderDirectionAsc,
},
},
wantErr: false,
},
{
name: "orderBy with condition key is rejected",
req: &PostableClusters{
Start: 1000,
End: 2000,
Limit: 100,
Offset: 0,
OrderBy: &qbtypes.OrderBy{
Key: qbtypes.OrderByKey{
TelemetryFieldKey: telemetrytypes.TelemetryFieldKey{
Name: "condition",
},
},
Direction: qbtypes.OrderDirectionDesc,
},
},
wantErr: true,
},
{
name: "orderBy with pod_phase key is rejected",
req: &PostableClusters{
Start: 1000,
End: 2000,
Limit: 100,
Offset: 0,
OrderBy: &qbtypes.OrderBy{
Key: qbtypes.OrderByKey{
TelemetryFieldKey: telemetrytypes.TelemetryFieldKey{
Name: "pod_phase",
},
},
Direction: qbtypes.OrderDirectionDesc,
},
},
wantErr: true,
},
{
name: "orderBy with invalid key",
req: &PostableClusters{
Start: 1000,
End: 2000,
Limit: 100,
Offset: 0,
OrderBy: &qbtypes.OrderBy{
Key: qbtypes.OrderByKey{
TelemetryFieldKey: telemetrytypes.TelemetryFieldKey{
Name: "unknown",
},
},
Direction: qbtypes.OrderDirectionDesc,
},
},
wantErr: true,
},
{
name: "orderBy with valid key but invalid direction",
req: &PostableClusters{
Start: 1000,
End: 2000,
Limit: 100,
Offset: 0,
OrderBy: &qbtypes.OrderBy{
Key: qbtypes.OrderByKey{
TelemetryFieldKey: telemetrytypes.TelemetryFieldKey{
Name: ClustersOrderByMemory,
},
},
Direction: qbtypes.OrderDirection{String: valuer.NewString("invalid")},
},
},
wantErr: true,
},
}
for _, tt := range tests {
t.Run(tt.name, func(t *testing.T) {
err := tt.req.Validate()
if tt.wantErr {
require.Error(t, err)
require.True(t, errors.Ast(err, errors.TypeInvalidInput), "expected error to be of type InvalidInput")
} else {
require.NoError(t, err)
}
})
}
}