Compare commits

..

3 Commits

Author SHA1 Message Date
nikhilmantri0902
885e29cd7b refactor(inframonitoring): drop required-metrics test, add test hosts_warnings 2026-06-19 03:04:47 +05:30
nikhilmantri0902
f182ee0c49 chore: metric_name required checks removed 2026-06-18 21:40:36 +05:30
Srikanth Chekuri
2cf7ef93ea chore: send warning instead of error for unseen metrics and missing (… (#11754)
Some checks failed
build-staging / prepare (push) Has been cancelled
build-staging / js-build (push) Has been cancelled
build-staging / go-build (push) Has been cancelled
build-staging / staging (push) Has been cancelled
Release Drafter / update_release_draft (push) Has been cancelled
* chore: send warning instead of error for unseen metrics and missing (metric, key)

* chore: update integration test

* chore: fix integration test

* chore: fix test

* chore: add unit test for missing key
2026-06-18 10:42:09 +00:00
40 changed files with 322 additions and 1456 deletions

View File

@@ -3938,8 +3938,6 @@ components:
items:
$ref: '#/components/schemas/InframonitoringtypesClusterRecord'
type: array
requiredMetricsCheck:
$ref: '#/components/schemas/InframonitoringtypesRequiredMetricsCheck'
total:
type: integer
type:
@@ -3950,7 +3948,6 @@ components:
- type
- records
- total
- requiredMetricsCheck
- endTimeBeforeRetention
type: object
InframonitoringtypesDaemonSetRecord:
@@ -4007,8 +4004,6 @@ components:
items:
$ref: '#/components/schemas/InframonitoringtypesDaemonSetRecord'
type: array
requiredMetricsCheck:
$ref: '#/components/schemas/InframonitoringtypesRequiredMetricsCheck'
total:
type: integer
type:
@@ -4019,7 +4014,6 @@ components:
- type
- records
- total
- requiredMetricsCheck
- endTimeBeforeRetention
type: object
InframonitoringtypesDeploymentRecord:
@@ -4076,8 +4070,6 @@ components:
items:
$ref: '#/components/schemas/InframonitoringtypesDeploymentRecord'
type: array
requiredMetricsCheck:
$ref: '#/components/schemas/InframonitoringtypesRequiredMetricsCheck'
total:
type: integer
type:
@@ -4088,7 +4080,6 @@ components:
- type
- records
- total
- requiredMetricsCheck
- endTimeBeforeRetention
type: object
InframonitoringtypesHostFilter:
@@ -4154,8 +4145,6 @@ components:
items:
$ref: '#/components/schemas/InframonitoringtypesHostRecord'
type: array
requiredMetricsCheck:
$ref: '#/components/schemas/InframonitoringtypesRequiredMetricsCheck'
total:
type: integer
type:
@@ -4166,7 +4155,6 @@ components:
- type
- records
- total
- requiredMetricsCheck
- endTimeBeforeRetention
type: object
InframonitoringtypesJobRecord:
@@ -4229,8 +4217,6 @@ components:
items:
$ref: '#/components/schemas/InframonitoringtypesJobRecord'
type: array
requiredMetricsCheck:
$ref: '#/components/schemas/InframonitoringtypesRequiredMetricsCheck'
total:
type: integer
type:
@@ -4241,7 +4227,6 @@ components:
- type
- records
- total
- requiredMetricsCheck
- endTimeBeforeRetention
type: object
InframonitoringtypesNamespaceRecord:
@@ -4276,8 +4261,6 @@ components:
items:
$ref: '#/components/schemas/InframonitoringtypesNamespaceRecord'
type: array
requiredMetricsCheck:
$ref: '#/components/schemas/InframonitoringtypesRequiredMetricsCheck'
total:
type: integer
type:
@@ -4288,7 +4271,6 @@ components:
- type
- records
- total
- requiredMetricsCheck
- endTimeBeforeRetention
type: object
InframonitoringtypesNodeCondition:
@@ -4353,8 +4335,6 @@ components:
items:
$ref: '#/components/schemas/InframonitoringtypesNodeRecord'
type: array
requiredMetricsCheck:
$ref: '#/components/schemas/InframonitoringtypesRequiredMetricsCheck'
total:
type: integer
type:
@@ -4365,7 +4345,6 @@ components:
- type
- records
- total
- requiredMetricsCheck
- endTimeBeforeRetention
type: object
InframonitoringtypesPodCountsByPhase:
@@ -4451,8 +4430,6 @@ components:
items:
$ref: '#/components/schemas/InframonitoringtypesPodRecord'
type: array
requiredMetricsCheck:
$ref: '#/components/schemas/InframonitoringtypesRequiredMetricsCheck'
total:
type: integer
type:
@@ -4463,7 +4440,6 @@ components:
- type
- records
- total
- requiredMetricsCheck
- endTimeBeforeRetention
type: object
InframonitoringtypesPostableClusters:
@@ -4726,16 +4702,6 @@ components:
- end
- limit
type: object
InframonitoringtypesRequiredMetricsCheck:
properties:
missingMetrics:
items:
type: string
nullable: true
type: array
required:
- missingMetrics
type: object
InframonitoringtypesResponseType:
enum:
- list
@@ -4795,8 +4761,6 @@ components:
items:
$ref: '#/components/schemas/InframonitoringtypesStatefulSetRecord'
type: array
requiredMetricsCheck:
$ref: '#/components/schemas/InframonitoringtypesRequiredMetricsCheck'
total:
type: integer
type:
@@ -4807,7 +4771,6 @@ components:
- type
- records
- total
- requiredMetricsCheck
- endTimeBeforeRetention
type: object
InframonitoringtypesVolumeRecord:
@@ -4855,8 +4818,6 @@ components:
items:
$ref: '#/components/schemas/InframonitoringtypesVolumeRecord'
type: array
requiredMetricsCheck:
$ref: '#/components/schemas/InframonitoringtypesRequiredMetricsCheck'
total:
type: integer
type:
@@ -4867,7 +4828,6 @@ components:
- type
- records
- total
- requiredMetricsCheck
- endTimeBeforeRetention
type: object
LlmpricingruletypesGettablePricingRules:
@@ -14695,10 +14655,10 @@ paths:
for custom groupBy keys; in both modes every row aggregates nodes and pods
in the group. Supports filtering via a filter expression, custom groupBy,
ordering by cpu / cpu_allocatable / memory / memory_allocatable, and pagination
via offset/limit. Also reports missing required metrics and whether the requested
time range falls before the data retention boundary. Numeric metric fields
(clusterCPU, clusterCPUAllocatable, clusterMemory, clusterMemoryAllocatable)
return -1 as a sentinel when no data is available for that field.'
via offset/limit. Also reports whether the requested time range falls before
the data retention boundary. Numeric metric fields (clusterCPU, clusterCPUAllocatable,
clusterMemory, clusterMemoryAllocatable) return -1 as a sentinel when no data
is available for that field.'
operationId: ListClusters
requestBody:
content:
@@ -14771,11 +14731,11 @@ paths:
row aggregates pods owned by daemonsets in the group. Supports filtering via
a filter expression, custom groupBy, ordering by cpu / cpu_request / cpu_limit
/ memory / memory_request / memory_limit / desired_nodes / current_nodes,
and pagination via offset/limit. Also reports missing required metrics and
whether the requested time range falls before the data retention boundary.
Numeric metric fields (daemonSetCPU, daemonSetCPURequest, daemonSetCPULimit,
daemonSetMemory, daemonSetMemoryRequest, daemonSetMemoryLimit, desiredNodes,
currentNodes) return -1 as a sentinel when no data is available for that field.'
and pagination via offset/limit. Also reports whether the requested time range
falls before the data retention boundary. Numeric metric fields (daemonSetCPU,
daemonSetCPURequest, daemonSetCPULimit, daemonSetMemory, daemonSetMemoryRequest,
daemonSetMemoryLimit, desiredNodes, currentNodes) return -1 as a sentinel
when no data is available for that field.'
operationId: ListDaemonSets
requestBody:
content:
@@ -14846,11 +14806,11 @@ paths:
group. Supports filtering via a filter expression, custom groupBy, ordering
by cpu / cpu_request / cpu_limit / memory / memory_request / memory_limit
/ desired_pods / available_pods, and pagination via offset/limit. Also reports
missing required metrics and whether the requested time range falls before
the data retention boundary. Numeric metric fields (deploymentCPU, deploymentCPURequest,
deploymentCPULimit, deploymentMemory, deploymentMemoryRequest, deploymentMemoryLimit,
desiredPods, availablePods) return -1 as a sentinel when no data is available
for that field.'
whether the requested time range falls before the data retention boundary.
Numeric metric fields (deploymentCPU, deploymentCPURequest, deploymentCPULimit,
deploymentMemory, deploymentMemoryRequest, deploymentMemoryLimit, desiredPods,
availablePods) return -1 as a sentinel when no data is available for that
field.'
operationId: ListDeployments
requestBody:
content:
@@ -14915,10 +14875,9 @@ paths:
custom groupBy to aggregate hosts by any attribute, ordering by any of the
five metrics, and pagination via offset/limit. The response type is ''list''
for the default host.name grouping or ''grouped_list'' for custom groupBy
keys. Also reports missing required metrics and whether the requested time
range falls before the data retention boundary. Numeric metric fields (cpu,
memory, wait, load15, diskUsage) return -1 as a sentinel when no data is available
for that field.'
keys. Also reports whether the requested time range falls before the data
retention boundary. Numeric metric fields (cpu, memory, wait, load15, diskUsage)
return -1 as a sentinel when no data is available for that field.'
operationId: ListHosts
requestBody:
content:
@@ -14992,11 +14951,11 @@ paths:
jobs in the group. Supports filtering via a filter expression, custom groupBy,
ordering by cpu / cpu_request / cpu_limit / memory / memory_request / memory_limit
/ desired_successful_pods / active_pods / failed_pods / successful_pods, and
pagination via offset/limit. Also reports missing required metrics and whether
the requested time range falls before the data retention boundary. Numeric
metric fields (jobCPU, jobCPURequest, jobCPULimit, jobMemory, jobMemoryRequest,
jobMemoryLimit, desiredSuccessfulPods, activePods, failedPods, successfulPods)
return -1 as a sentinel when no data is available for that field.'
pagination via offset/limit. Also reports whether the requested time range
falls before the data retention boundary. Numeric metric fields (jobCPU, jobCPURequest,
jobCPULimit, jobMemory, jobMemoryRequest, jobMemoryLimit, desiredSuccessfulPods,
activePods, failedPods, successfulPods) return -1 as a sentinel when no data
is available for that field.'
operationId: ListJobs
requestBody:
content:
@@ -15061,10 +15020,10 @@ paths:
type is ''list'' for the default k8s.namespace.name grouping or ''grouped_list''
for custom groupBy keys; in both modes every row aggregates pods in the group.
Supports filtering via a filter expression, custom groupBy, ordering by cpu
/ memory, and pagination via offset/limit. Also reports missing required metrics
and whether the requested time range falls before the data retention boundary.
Numeric metric fields (namespaceCPU, namespaceMemory) return -1 as a sentinel
when no data is available for that field.'
/ memory, and pagination via offset/limit. Also reports whether the requested
time range falls before the data retention boundary. Numeric metric fields
(namespaceCPU, namespaceMemory) return -1 as a sentinel when no data is available
for that field.'
operationId: ListNamespaces
requestBody:
content:
@@ -15132,10 +15091,10 @@ paths:
for custom groupBy keys (each row aggregates nodes in the group; condition
stays no_data). Supports filtering via a filter expression, custom groupBy,
ordering by cpu / cpu_allocatable / memory / memory_allocatable, and pagination
via offset/limit. Also reports missing required metrics and whether the requested
time range falls before the data retention boundary. Numeric metric fields
(nodeCPU, nodeCPUAllocatable, nodeMemory, nodeMemoryAllocatable) return -1
as a sentinel when no data is available for that field.'
via offset/limit. Also reports whether the requested time range falls before
the data retention boundary. Numeric metric fields (nodeCPU, nodeCPUAllocatable,
nodeMemory, nodeMemoryAllocatable) return -1 as a sentinel when no data is
available for that field.'
operationId: ListNodes
requestBody:
content:
@@ -15204,11 +15163,10 @@ paths:
is one pod with its current phase) or ''grouped_list'' for custom groupBy
keys (each row aggregates pods in the group with per-phase counts under podCountsByPhase:
{ pending, running, succeeded, failed, unknown } derived from each pod''s
latest phase in the window). Also reports missing required metrics and whether
the requested time range falls before the data retention boundary. Numeric
metric fields (podCPU, podCPURequest, podCPULimit, podMemory, podMemoryRequest,
podMemoryLimit, podAge) return -1 as a sentinel when no data is available
for that field.'
latest phase in the window). Also reports whether the requested time range
falls before the data retention boundary. Numeric metric fields (podCPU, podCPURequest,
podCPULimit, podMemory, podMemoryRequest, podMemoryLimit, podAge) return -1
as a sentinel when no data is available for that field.'
operationId: ListPods
requestBody:
content:
@@ -15275,11 +15233,10 @@ paths:
usage, inodes, inodes_free, inodes_used), and pagination via offset/limit.
The response type is ''list'' for the default k8s.persistentvolumeclaim.name
grouping or ''grouped_list'' for custom groupBy keys; in both modes every
row aggregates volumes in the group. Also reports missing required metrics
and whether the requested time range falls before the data retention boundary.
Numeric metric fields (volumeAvailable, volumeCapacity, volumeUsage, volumeInodes,
volumeInodesFree, volumeInodesUsed) return -1 as a sentinel when no data is
available for that field.'
row aggregates volumes in the group. Also reports whether the requested time
range falls before the data retention boundary. Numeric metric fields (volumeAvailable,
volumeCapacity, volumeUsage, volumeInodes, volumeInodesFree, volumeInodesUsed)
return -1 as a sentinel when no data is available for that field.'
operationId: ListVolumes
requestBody:
content:
@@ -15350,11 +15307,10 @@ paths:
statefulsets in the group. Supports filtering via a filter expression, custom
groupBy, ordering by cpu / cpu_request / cpu_limit / memory / memory_request
/ memory_limit / desired_pods / current_pods, and pagination via offset/limit.
Also reports missing required metrics and whether the requested time range
falls before the data retention boundary. Numeric metric fields (statefulSetCPU,
statefulSetCPURequest, statefulSetCPULimit, statefulSetMemory, statefulSetMemoryRequest,
statefulSetMemoryLimit, desiredPods, currentPods) return -1 as a sentinel
when no data is available for that field.'
Also reports whether the requested time range falls before the data retention
boundary. Numeric metric fields (statefulSetCPU, statefulSetCPURequest, statefulSetCPULimit,
statefulSetMemory, statefulSetMemoryRequest, statefulSetMemoryLimit, desiredPods,
currentPods) return -1 as a sentinel when no data is available for that field.'
operationId: ListStatefulSets
requestBody:
content:

View File

@@ -39,7 +39,7 @@ import { GeneratedAPIInstance } from '../../../generatedAPIInstance';
import type { ErrorType, BodyType } from '../../../generatedAPIInstance';
/**
* Returns a paginated list of Kubernetes clusters with key aggregated metrics derived by summing per-node values within the group: CPU usage, CPU allocatable, memory working set, memory allocatable. Each row also reports per-group nodeCountsByReadiness ({ ready, notReady } from each node's latest k8s.node.condition_ready value) and per-group podCountsByPhase ({ pending, running, succeeded, failed, unknown } from each pod's latest k8s.pod.phase value). Each cluster includes metadata attributes (k8s.cluster.name). The response type is 'list' for the default k8s.cluster.name grouping or 'grouped_list' for custom groupBy keys; in both modes every row aggregates nodes and pods in the group. Supports filtering via a filter expression, custom groupBy, ordering by cpu / cpu_allocatable / memory / memory_allocatable, and pagination via offset/limit. Also reports missing required metrics and whether the requested time range falls before the data retention boundary. Numeric metric fields (clusterCPU, clusterCPUAllocatable, clusterMemory, clusterMemoryAllocatable) return -1 as a sentinel when no data is available for that field.
* Returns a paginated list of Kubernetes clusters with key aggregated metrics derived by summing per-node values within the group: CPU usage, CPU allocatable, memory working set, memory allocatable. Each row also reports per-group nodeCountsByReadiness ({ ready, notReady } from each node's latest k8s.node.condition_ready value) and per-group podCountsByPhase ({ pending, running, succeeded, failed, unknown } from each pod's latest k8s.pod.phase value). Each cluster includes metadata attributes (k8s.cluster.name). The response type is 'list' for the default k8s.cluster.name grouping or 'grouped_list' for custom groupBy keys; in both modes every row aggregates nodes and pods in the group. Supports filtering via a filter expression, custom groupBy, ordering by cpu / cpu_allocatable / memory / memory_allocatable, and pagination via offset/limit. Also reports whether the requested time range falls before the data retention boundary. Numeric metric fields (clusterCPU, clusterCPUAllocatable, clusterMemory, clusterMemoryAllocatable) return -1 as a sentinel when no data is available for that field.
* @summary List Clusters for Infra Monitoring
*/
export const listClusters = (
@@ -122,7 +122,7 @@ export const useListClusters = <
return useMutation(getListClustersMutationOptions(options));
};
/**
* Returns a paginated list of Kubernetes DaemonSets with key aggregated pod metrics: CPU usage and memory working set summed across pods owned by the daemonset, plus average CPU/memory request and limit utilization (daemonSetCPURequest, daemonSetCPULimit, daemonSetMemoryRequest, daemonSetMemoryLimit). Each row also reports the latest known node-level counters from kube-state-metrics: desiredNodes (k8s.daemonset.desired_scheduled_nodes, the number of nodes the daemonset wants to run on) and currentNodes (k8s.daemonset.current_scheduled_nodes, the number of nodes the daemonset currently runs on) — note these are node counts, not pod counts. It also reports per-group podCountsByPhase ({ pending, running, succeeded, failed, unknown } from each pod's latest k8s.pod.phase value). Each daemonset includes metadata attributes (k8s.daemonset.name, k8s.namespace.name, k8s.cluster.name). The response type is 'list' for the default k8s.daemonset.name grouping or 'grouped_list' for custom groupBy keys; in both modes every row aggregates pods owned by daemonsets in the group. Supports filtering via a filter expression, custom groupBy, ordering by cpu / cpu_request / cpu_limit / memory / memory_request / memory_limit / desired_nodes / current_nodes, and pagination via offset/limit. Also reports missing required metrics and whether the requested time range falls before the data retention boundary. Numeric metric fields (daemonSetCPU, daemonSetCPURequest, daemonSetCPULimit, daemonSetMemory, daemonSetMemoryRequest, daemonSetMemoryLimit, desiredNodes, currentNodes) return -1 as a sentinel when no data is available for that field.
* Returns a paginated list of Kubernetes DaemonSets with key aggregated pod metrics: CPU usage and memory working set summed across pods owned by the daemonset, plus average CPU/memory request and limit utilization (daemonSetCPURequest, daemonSetCPULimit, daemonSetMemoryRequest, daemonSetMemoryLimit). Each row also reports the latest known node-level counters from kube-state-metrics: desiredNodes (k8s.daemonset.desired_scheduled_nodes, the number of nodes the daemonset wants to run on) and currentNodes (k8s.daemonset.current_scheduled_nodes, the number of nodes the daemonset currently runs on) — note these are node counts, not pod counts. It also reports per-group podCountsByPhase ({ pending, running, succeeded, failed, unknown } from each pod's latest k8s.pod.phase value). Each daemonset includes metadata attributes (k8s.daemonset.name, k8s.namespace.name, k8s.cluster.name). The response type is 'list' for the default k8s.daemonset.name grouping or 'grouped_list' for custom groupBy keys; in both modes every row aggregates pods owned by daemonsets in the group. Supports filtering via a filter expression, custom groupBy, ordering by cpu / cpu_request / cpu_limit / memory / memory_request / memory_limit / desired_nodes / current_nodes, and pagination via offset/limit. Also reports whether the requested time range falls before the data retention boundary. Numeric metric fields (daemonSetCPU, daemonSetCPURequest, daemonSetCPULimit, daemonSetMemory, daemonSetMemoryRequest, daemonSetMemoryLimit, desiredNodes, currentNodes) return -1 as a sentinel when no data is available for that field.
* @summary List DaemonSets for Infra Monitoring
*/
export const listDaemonSets = (
@@ -205,7 +205,7 @@ export const useListDaemonSets = <
return useMutation(getListDaemonSetsMutationOptions(options));
};
/**
* Returns a paginated list of Kubernetes Deployments with key aggregated pod metrics: CPU usage and memory working set summed across pods owned by the deployment, plus average CPU/memory request and limit utilization (deploymentCPURequest, deploymentCPULimit, deploymentMemoryRequest, deploymentMemoryLimit). Each row also reports the latest known desiredPods (k8s.deployment.desired) and availablePods (k8s.deployment.available) replica counts and per-group podCountsByPhase ({ pending, running, succeeded, failed, unknown } from each pod's latest k8s.pod.phase value). Each deployment includes metadata attributes (k8s.deployment.name, k8s.namespace.name, k8s.cluster.name). The response type is 'list' for the default k8s.deployment.name grouping or 'grouped_list' for custom groupBy keys; in both modes every row aggregates pods owned by deployments in the group. Supports filtering via a filter expression, custom groupBy, ordering by cpu / cpu_request / cpu_limit / memory / memory_request / memory_limit / desired_pods / available_pods, and pagination via offset/limit. Also reports missing required metrics and whether the requested time range falls before the data retention boundary. Numeric metric fields (deploymentCPU, deploymentCPURequest, deploymentCPULimit, deploymentMemory, deploymentMemoryRequest, deploymentMemoryLimit, desiredPods, availablePods) return -1 as a sentinel when no data is available for that field.
* Returns a paginated list of Kubernetes Deployments with key aggregated pod metrics: CPU usage and memory working set summed across pods owned by the deployment, plus average CPU/memory request and limit utilization (deploymentCPURequest, deploymentCPULimit, deploymentMemoryRequest, deploymentMemoryLimit). Each row also reports the latest known desiredPods (k8s.deployment.desired) and availablePods (k8s.deployment.available) replica counts and per-group podCountsByPhase ({ pending, running, succeeded, failed, unknown } from each pod's latest k8s.pod.phase value). Each deployment includes metadata attributes (k8s.deployment.name, k8s.namespace.name, k8s.cluster.name). The response type is 'list' for the default k8s.deployment.name grouping or 'grouped_list' for custom groupBy keys; in both modes every row aggregates pods owned by deployments in the group. Supports filtering via a filter expression, custom groupBy, ordering by cpu / cpu_request / cpu_limit / memory / memory_request / memory_limit / desired_pods / available_pods, and pagination via offset/limit. Also reports whether the requested time range falls before the data retention boundary. Numeric metric fields (deploymentCPU, deploymentCPURequest, deploymentCPULimit, deploymentMemory, deploymentMemoryRequest, deploymentMemoryLimit, desiredPods, availablePods) return -1 as a sentinel when no data is available for that field.
* @summary List Deployments for Infra Monitoring
*/
export const listDeployments = (
@@ -288,7 +288,7 @@ export const useListDeployments = <
return useMutation(getListDeploymentsMutationOptions(options));
};
/**
* Returns a paginated list of hosts with key infrastructure metrics: CPU usage (%), memory usage (%), I/O wait (%), disk usage (%), and 15-minute load average. Each host includes its current status (active/inactive based on metrics reported in the last 10 minutes) and metadata attributes (e.g., os.type). Supports filtering via a filter expression, filtering by host status, custom groupBy to aggregate hosts by any attribute, ordering by any of the five metrics, and pagination via offset/limit. The response type is 'list' for the default host.name grouping or 'grouped_list' for custom groupBy keys. Also reports missing required metrics and whether the requested time range falls before the data retention boundary. Numeric metric fields (cpu, memory, wait, load15, diskUsage) return -1 as a sentinel when no data is available for that field.
* Returns a paginated list of hosts with key infrastructure metrics: CPU usage (%), memory usage (%), I/O wait (%), disk usage (%), and 15-minute load average. Each host includes its current status (active/inactive based on metrics reported in the last 10 minutes) and metadata attributes (e.g., os.type). Supports filtering via a filter expression, filtering by host status, custom groupBy to aggregate hosts by any attribute, ordering by any of the five metrics, and pagination via offset/limit. The response type is 'list' for the default host.name grouping or 'grouped_list' for custom groupBy keys. Also reports whether the requested time range falls before the data retention boundary. Numeric metric fields (cpu, memory, wait, load15, diskUsage) return -1 as a sentinel when no data is available for that field.
* @summary List Hosts for Infra Monitoring
*/
export const listHosts = (
@@ -371,7 +371,7 @@ export const useListHosts = <
return useMutation(getListHostsMutationOptions(options));
};
/**
* Returns a paginated list of Kubernetes Jobs with key aggregated pod metrics: CPU usage and memory working set summed across pods owned by the job, plus average CPU/memory request and limit utilization (jobCPURequest, jobCPULimit, jobMemoryRequest, jobMemoryLimit). Each row also reports the latest known job-level counters from kube-state-metrics: desiredSuccessfulPods (k8s.job.desired_successful_pods, the target completion count), activePods (k8s.job.active_pods), failedPods (k8s.job.failed_pods, cumulative across the lifetime of the job), and successfulPods (k8s.job.successful_pods, cumulative). It also reports per-group podCountsByPhase ({ pending, running, succeeded, failed, unknown } from each pod's latest k8s.pod.phase value); note podCountsByPhase.failed (current pod-phase) is distinct from failedPods (cumulative job kube-state-metric). Each job includes metadata attributes (k8s.job.name, k8s.namespace.name, k8s.cluster.name). The response type is 'list' for the default k8s.job.name grouping or 'grouped_list' for custom groupBy keys; in both modes every row aggregates pods owned by jobs in the group. Supports filtering via a filter expression, custom groupBy, ordering by cpu / cpu_request / cpu_limit / memory / memory_request / memory_limit / desired_successful_pods / active_pods / failed_pods / successful_pods, and pagination via offset/limit. Also reports missing required metrics and whether the requested time range falls before the data retention boundary. Numeric metric fields (jobCPU, jobCPURequest, jobCPULimit, jobMemory, jobMemoryRequest, jobMemoryLimit, desiredSuccessfulPods, activePods, failedPods, successfulPods) return -1 as a sentinel when no data is available for that field.
* Returns a paginated list of Kubernetes Jobs with key aggregated pod metrics: CPU usage and memory working set summed across pods owned by the job, plus average CPU/memory request and limit utilization (jobCPURequest, jobCPULimit, jobMemoryRequest, jobMemoryLimit). Each row also reports the latest known job-level counters from kube-state-metrics: desiredSuccessfulPods (k8s.job.desired_successful_pods, the target completion count), activePods (k8s.job.active_pods), failedPods (k8s.job.failed_pods, cumulative across the lifetime of the job), and successfulPods (k8s.job.successful_pods, cumulative). It also reports per-group podCountsByPhase ({ pending, running, succeeded, failed, unknown } from each pod's latest k8s.pod.phase value); note podCountsByPhase.failed (current pod-phase) is distinct from failedPods (cumulative job kube-state-metric). Each job includes metadata attributes (k8s.job.name, k8s.namespace.name, k8s.cluster.name). The response type is 'list' for the default k8s.job.name grouping or 'grouped_list' for custom groupBy keys; in both modes every row aggregates pods owned by jobs in the group. Supports filtering via a filter expression, custom groupBy, ordering by cpu / cpu_request / cpu_limit / memory / memory_request / memory_limit / desired_successful_pods / active_pods / failed_pods / successful_pods, and pagination via offset/limit. Also reports whether the requested time range falls before the data retention boundary. Numeric metric fields (jobCPU, jobCPURequest, jobCPULimit, jobMemory, jobMemoryRequest, jobMemoryLimit, desiredSuccessfulPods, activePods, failedPods, successfulPods) return -1 as a sentinel when no data is available for that field.
* @summary List Jobs for Infra Monitoring
*/
export const listJobs = (
@@ -454,7 +454,7 @@ export const useListJobs = <
return useMutation(getListJobsMutationOptions(options));
};
/**
* Returns a paginated list of Kubernetes namespaces with key aggregated pod metrics: CPU usage and memory working set (summed across pods in the group), plus per-group podCountsByPhase ({ pending, running, succeeded, failed, unknown } from each pod's latest k8s.pod.phase value in the window). Each namespace includes metadata attributes (k8s.namespace.name, k8s.cluster.name). The response type is 'list' for the default k8s.namespace.name grouping or 'grouped_list' for custom groupBy keys; in both modes every row aggregates pods in the group. Supports filtering via a filter expression, custom groupBy, ordering by cpu / memory, and pagination via offset/limit. Also reports missing required metrics and whether the requested time range falls before the data retention boundary. Numeric metric fields (namespaceCPU, namespaceMemory) return -1 as a sentinel when no data is available for that field.
* Returns a paginated list of Kubernetes namespaces with key aggregated pod metrics: CPU usage and memory working set (summed across pods in the group), plus per-group podCountsByPhase ({ pending, running, succeeded, failed, unknown } from each pod's latest k8s.pod.phase value in the window). Each namespace includes metadata attributes (k8s.namespace.name, k8s.cluster.name). The response type is 'list' for the default k8s.namespace.name grouping or 'grouped_list' for custom groupBy keys; in both modes every row aggregates pods in the group. Supports filtering via a filter expression, custom groupBy, ordering by cpu / memory, and pagination via offset/limit. Also reports whether the requested time range falls before the data retention boundary. Numeric metric fields (namespaceCPU, namespaceMemory) return -1 as a sentinel when no data is available for that field.
* @summary List Namespaces for Infra Monitoring
*/
export const listNamespaces = (
@@ -537,7 +537,7 @@ export const useListNamespaces = <
return useMutation(getListNamespacesMutationOptions(options));
};
/**
* Returns a paginated list of Kubernetes nodes with key metrics: CPU usage, CPU allocatable, memory working set, memory allocatable, per-group nodeCountsByReadiness ({ ready, notReady } from each node's latest k8s.node.condition_ready in the window) and per-group podCountsByPhase ({ pending, running, succeeded, failed, unknown } for pods scheduled on the listed nodes). Each node includes metadata attributes (k8s.node.uid, k8s.cluster.name). The response type is 'list' for the default k8s.node.name grouping (each row is one node with its current condition string: ready / not_ready / no_data) or 'grouped_list' for custom groupBy keys (each row aggregates nodes in the group; condition stays no_data). Supports filtering via a filter expression, custom groupBy, ordering by cpu / cpu_allocatable / memory / memory_allocatable, and pagination via offset/limit. Also reports missing required metrics and whether the requested time range falls before the data retention boundary. Numeric metric fields (nodeCPU, nodeCPUAllocatable, nodeMemory, nodeMemoryAllocatable) return -1 as a sentinel when no data is available for that field.
* Returns a paginated list of Kubernetes nodes with key metrics: CPU usage, CPU allocatable, memory working set, memory allocatable, per-group nodeCountsByReadiness ({ ready, notReady } from each node's latest k8s.node.condition_ready in the window) and per-group podCountsByPhase ({ pending, running, succeeded, failed, unknown } for pods scheduled on the listed nodes). Each node includes metadata attributes (k8s.node.uid, k8s.cluster.name). The response type is 'list' for the default k8s.node.name grouping (each row is one node with its current condition string: ready / not_ready / no_data) or 'grouped_list' for custom groupBy keys (each row aggregates nodes in the group; condition stays no_data). Supports filtering via a filter expression, custom groupBy, ordering by cpu / cpu_allocatable / memory / memory_allocatable, and pagination via offset/limit. Also reports whether the requested time range falls before the data retention boundary. Numeric metric fields (nodeCPU, nodeCPUAllocatable, nodeMemory, nodeMemoryAllocatable) return -1 as a sentinel when no data is available for that field.
* @summary List Nodes for Infra Monitoring
*/
export const listNodes = (
@@ -620,7 +620,7 @@ export const useListNodes = <
return useMutation(getListNodesMutationOptions(options));
};
/**
* Returns a paginated list of Kubernetes pods with key metrics: CPU usage, CPU request/limit utilization, memory working set, memory request/limit utilization, current pod phase (pending/running/succeeded/failed/unknown/no_data), and pod age (ms since start time). Each pod includes metadata attributes (namespace, node, workload owner such as deployment/statefulset/daemonset/job/cronjob, cluster). Supports filtering via a filter expression, custom groupBy to aggregate pods by any attribute, ordering by any of the six metrics (cpu, cpu_request, cpu_limit, memory, memory_request, memory_limit), and pagination via offset/limit. The response type is 'list' for the default k8s.pod.uid grouping (each row is one pod with its current phase) or 'grouped_list' for custom groupBy keys (each row aggregates pods in the group with per-phase counts under podCountsByPhase: { pending, running, succeeded, failed, unknown } derived from each pod's latest phase in the window). Also reports missing required metrics and whether the requested time range falls before the data retention boundary. Numeric metric fields (podCPU, podCPURequest, podCPULimit, podMemory, podMemoryRequest, podMemoryLimit, podAge) return -1 as a sentinel when no data is available for that field.
* Returns a paginated list of Kubernetes pods with key metrics: CPU usage, CPU request/limit utilization, memory working set, memory request/limit utilization, current pod phase (pending/running/succeeded/failed/unknown/no_data), and pod age (ms since start time). Each pod includes metadata attributes (namespace, node, workload owner such as deployment/statefulset/daemonset/job/cronjob, cluster). Supports filtering via a filter expression, custom groupBy to aggregate pods by any attribute, ordering by any of the six metrics (cpu, cpu_request, cpu_limit, memory, memory_request, memory_limit), and pagination via offset/limit. The response type is 'list' for the default k8s.pod.uid grouping (each row is one pod with its current phase) or 'grouped_list' for custom groupBy keys (each row aggregates pods in the group with per-phase counts under podCountsByPhase: { pending, running, succeeded, failed, unknown } derived from each pod's latest phase in the window). Also reports whether the requested time range falls before the data retention boundary. Numeric metric fields (podCPU, podCPURequest, podCPULimit, podMemory, podMemoryRequest, podMemoryLimit, podAge) return -1 as a sentinel when no data is available for that field.
* @summary List Pods for Infra Monitoring
*/
export const listPods = (
@@ -703,7 +703,7 @@ export const useListPods = <
return useMutation(getListPodsMutationOptions(options));
};
/**
* Returns a paginated list of Kubernetes persistent volume claims (PVCs) with key volume metrics: available bytes, capacity bytes, usage (capacity - available), inodes, free inodes, and used inodes. Each row also includes metadata attributes (k8s.persistentvolumeclaim.name, k8s.pod.uid, k8s.pod.name, k8s.namespace.name, k8s.node.name, k8s.statefulset.name, k8s.cluster.name). Supports filtering via a filter expression, custom groupBy to aggregate volumes by any attribute, ordering by any of the six metrics (available, capacity, usage, inodes, inodes_free, inodes_used), and pagination via offset/limit. The response type is 'list' for the default k8s.persistentvolumeclaim.name grouping or 'grouped_list' for custom groupBy keys; in both modes every row aggregates volumes in the group. Also reports missing required metrics and whether the requested time range falls before the data retention boundary. Numeric metric fields (volumeAvailable, volumeCapacity, volumeUsage, volumeInodes, volumeInodesFree, volumeInodesUsed) return -1 as a sentinel when no data is available for that field.
* Returns a paginated list of Kubernetes persistent volume claims (PVCs) with key volume metrics: available bytes, capacity bytes, usage (capacity - available), inodes, free inodes, and used inodes. Each row also includes metadata attributes (k8s.persistentvolumeclaim.name, k8s.pod.uid, k8s.pod.name, k8s.namespace.name, k8s.node.name, k8s.statefulset.name, k8s.cluster.name). Supports filtering via a filter expression, custom groupBy to aggregate volumes by any attribute, ordering by any of the six metrics (available, capacity, usage, inodes, inodes_free, inodes_used), and pagination via offset/limit. The response type is 'list' for the default k8s.persistentvolumeclaim.name grouping or 'grouped_list' for custom groupBy keys; in both modes every row aggregates volumes in the group. Also reports whether the requested time range falls before the data retention boundary. Numeric metric fields (volumeAvailable, volumeCapacity, volumeUsage, volumeInodes, volumeInodesFree, volumeInodesUsed) return -1 as a sentinel when no data is available for that field.
* @summary List Volumes for Infra Monitoring
*/
export const listVolumes = (
@@ -786,7 +786,7 @@ export const useListVolumes = <
return useMutation(getListVolumesMutationOptions(options));
};
/**
* Returns a paginated list of Kubernetes StatefulSets with key aggregated pod metrics: CPU usage and memory working set summed across pods owned by the statefulset, plus average CPU/memory request and limit utilization (statefulSetCPURequest, statefulSetCPULimit, statefulSetMemoryRequest, statefulSetMemoryLimit). Each row also reports the latest known desiredPods (k8s.statefulset.desired_pods) and currentPods (k8s.statefulset.current_pods) replica counts and per-group podCountsByPhase ({ pending, running, succeeded, failed, unknown } from each pod's latest k8s.pod.phase value). Each statefulset includes metadata attributes (k8s.statefulset.name, k8s.namespace.name, k8s.cluster.name). The response type is 'list' for the default k8s.statefulset.name grouping or 'grouped_list' for custom groupBy keys; in both modes every row aggregates pods owned by statefulsets in the group. Supports filtering via a filter expression, custom groupBy, ordering by cpu / cpu_request / cpu_limit / memory / memory_request / memory_limit / desired_pods / current_pods, and pagination via offset/limit. Also reports missing required metrics and whether the requested time range falls before the data retention boundary. Numeric metric fields (statefulSetCPU, statefulSetCPURequest, statefulSetCPULimit, statefulSetMemory, statefulSetMemoryRequest, statefulSetMemoryLimit, desiredPods, currentPods) return -1 as a sentinel when no data is available for that field.
* Returns a paginated list of Kubernetes StatefulSets with key aggregated pod metrics: CPU usage and memory working set summed across pods owned by the statefulset, plus average CPU/memory request and limit utilization (statefulSetCPURequest, statefulSetCPULimit, statefulSetMemoryRequest, statefulSetMemoryLimit). Each row also reports the latest known desiredPods (k8s.statefulset.desired_pods) and currentPods (k8s.statefulset.current_pods) replica counts and per-group podCountsByPhase ({ pending, running, succeeded, failed, unknown } from each pod's latest k8s.pod.phase value). Each statefulset includes metadata attributes (k8s.statefulset.name, k8s.namespace.name, k8s.cluster.name). The response type is 'list' for the default k8s.statefulset.name grouping or 'grouped_list' for custom groupBy keys; in both modes every row aggregates pods owned by statefulsets in the group. Supports filtering via a filter expression, custom groupBy, ordering by cpu / cpu_request / cpu_limit / memory / memory_request / memory_limit / desired_pods / current_pods, and pagination via offset/limit. Also reports whether the requested time range falls before the data retention boundary. Numeric metric fields (statefulSetCPU, statefulSetCPURequest, statefulSetCPULimit, statefulSetMemory, statefulSetMemoryRequest, statefulSetMemoryLimit, desiredPods, currentPods) return -1 as a sentinel when no data is available for that field.
* @summary List StatefulSets for Infra Monitoring
*/
export const listStatefulSets = (

View File

@@ -5423,13 +5423,6 @@ export interface InframonitoringtypesClusterRecordDTO {
podCountsByPhase: InframonitoringtypesPodCountsByPhaseDTO;
}
export interface InframonitoringtypesRequiredMetricsCheckDTO {
/**
* @type array,null
*/
missingMetrics: string[] | null;
}
export enum InframonitoringtypesResponseTypeDTO {
list = 'list',
grouped_list = 'grouped_list',
@@ -5465,7 +5458,6 @@ export interface InframonitoringtypesClustersDTO {
* @type array
*/
records: InframonitoringtypesClusterRecordDTO[];
requiredMetricsCheck: InframonitoringtypesRequiredMetricsCheckDTO;
/**
* @type integer
*/
@@ -5543,7 +5535,6 @@ export interface InframonitoringtypesDaemonSetsDTO {
* @type array
*/
records: InframonitoringtypesDaemonSetRecordDTO[];
requiredMetricsCheck: InframonitoringtypesRequiredMetricsCheckDTO;
/**
* @type integer
*/
@@ -5621,7 +5612,6 @@ export interface InframonitoringtypesDeploymentsDTO {
* @type array
*/
records: InframonitoringtypesDeploymentRecordDTO[];
requiredMetricsCheck: InframonitoringtypesRequiredMetricsCheckDTO;
/**
* @type integer
*/
@@ -5707,7 +5697,6 @@ export interface InframonitoringtypesHostsDTO {
* @type array
*/
records: InframonitoringtypesHostRecordDTO[];
requiredMetricsCheck: InframonitoringtypesRequiredMetricsCheckDTO;
/**
* @type integer
*/
@@ -5793,7 +5782,6 @@ export interface InframonitoringtypesJobsDTO {
* @type array
*/
records: InframonitoringtypesJobRecordDTO[];
requiredMetricsCheck: InframonitoringtypesRequiredMetricsCheckDTO;
/**
* @type integer
*/
@@ -5843,7 +5831,6 @@ export interface InframonitoringtypesNamespacesDTO {
* @type array
*/
records: InframonitoringtypesNamespaceRecordDTO[];
requiredMetricsCheck: InframonitoringtypesRequiredMetricsCheckDTO;
/**
* @type integer
*/
@@ -5910,7 +5897,6 @@ export interface InframonitoringtypesNodesDTO {
* @type array
*/
records: InframonitoringtypesNodeRecordDTO[];
requiredMetricsCheck: InframonitoringtypesRequiredMetricsCheckDTO;
/**
* @type integer
*/
@@ -5994,7 +5980,6 @@ export interface InframonitoringtypesPodsDTO {
* @type array
*/
records: InframonitoringtypesPodRecordDTO[];
requiredMetricsCheck: InframonitoringtypesRequiredMetricsCheckDTO;
/**
* @type integer
*/
@@ -6342,7 +6327,6 @@ export interface InframonitoringtypesStatefulSetsDTO {
* @type array
*/
records: InframonitoringtypesStatefulSetRecordDTO[];
requiredMetricsCheck: InframonitoringtypesRequiredMetricsCheckDTO;
/**
* @type integer
*/
@@ -6411,7 +6395,6 @@ export interface InframonitoringtypesVolumesDTO {
* @type array
*/
records: InframonitoringtypesVolumeRecordDTO[];
requiredMetricsCheck: InframonitoringtypesRequiredMetricsCheckDTO;
/**
* @type integer
*/

View File

@@ -20,7 +20,7 @@ import APIError from 'types/api/error';
import DashboardActions from './DashboardActions/DashboardActions';
import DashboardInfo from './DashboardInfo/DashboardInfo';
import { useEditableTitle } from './DashboardInfo/useEditableTitle';
import VariablesBar from '../VariablesBar/VariablesBar';
import { usePublicDashboardMeta } from '../DashboardSettings/PublicDashboard/usePublicDashboardMeta';
import styles from './DashboardPageToolbar.module.scss';
@@ -53,6 +53,10 @@ function DashboardPageToolbar(props: DashboardPageToolbarProps): JSX.Element {
(s) => s.setIsPanelTypeSelectionModalOpen,
);
// Single global fetch of the public-sharing meta (the drawer reuses this cache);
// drives the public-access badge.
const { isPublic: isPublicDashboard } = usePublicDashboardMeta(id);
const isAuthor =
!!user?.email && !!dashboard.createdBy && dashboard.createdBy === user.email;
@@ -118,7 +122,7 @@ function DashboardPageToolbar(props: DashboardPageToolbarProps): JSX.Element {
image={image}
tags={tags}
description={description}
isPublicDashboard={false}
isPublicDashboard={isPublicDashboard}
isDashboardLocked={isDashboardLocked}
isEditing={isEditing}
draft={draft}
@@ -138,8 +142,6 @@ function DashboardPageToolbar(props: DashboardPageToolbarProps): JSX.Element {
onOpenRename={startEdit}
/>
</div>
<VariablesBar dashboard={dashboard} />
</section>
);
}

View File

@@ -1,4 +1,3 @@
import { sortBy } from 'lodash-es';
import type { VariableDefaultValueDTO } from 'api/generated/services/sigNoz.schemas';
/**
@@ -77,26 +76,3 @@ export function emptyVariableFormModel(): VariableFormModel {
dynamicSignal: 'traces',
};
}
/** Maps the dynamic-variable signal to the field-values API signal. */
export function signalForApi(
signal: TelemetrySignal,
): TelemetrySignal | undefined {
return signal;
}
type SortableValues = (string | number | boolean)[];
/** Sorts option/preview values by the variable's chosen order (no-op when disabled). */
export function sortValuesByOrder(
values: SortableValues,
sort: VariableSort,
): SortableValues {
if (sort === 'ASC') {
return sortBy(values);
}
if (sort === 'DESC') {
return sortBy(values).reverse();
}
return values;
}

View File

@@ -1,114 +0,0 @@
import { useMemo } from 'react';
import { SolidInfoCircle } from '@signozhq/icons';
import { Typography } from '@signozhq/ui/typography';
// eslint-disable-next-line signoz/no-antd-components -- lightweight description tooltip, matches V1
import { Tooltip } from 'antd';
import { commaValuesParser } from 'lib/dashboardVariables/customCommaValuesParser';
import { sortValuesByOrder } from '../DashboardSettings/Variables/variableModel';
import type { VariableFormModel } from '../DashboardSettings/Variables/variableModel';
import type { VariableSelection, VariableSelectionMap } from './selectionTypes';
import DynamicSelector from './selectors/DynamicSelector';
import QuerySelector from './selectors/QuerySelector';
import TextSelector from './selectors/TextSelector';
import ValueSelector from './selectors/ValueSelector';
import styles from './VariablesBar.module.scss';
interface VariableSelectorProps {
variable: VariableFormModel;
/** All variables (Dynamic uses them to scope options by sibling selections). */
variables: VariableFormModel[];
/** Names this variable depends on (for Query gating). */
parents: string[];
/** All current selections (Query passes them as the request payload). */
selections: VariableSelectionMap;
selection: VariableSelection;
onChange: (selection: VariableSelection) => void;
}
/** One labelled variable control; dispatches on the variable type. */
function VariableSelector({
variable,
variables,
parents,
selections,
selection,
onChange,
}: VariableSelectorProps): JSX.Element {
const customOptions = useMemo(
() =>
variable.type === 'CUSTOM'
? sortValuesByOrder(
commaValuesParser(variable.customValue),
variable.sort,
).map(String)
: [],
[variable],
);
const renderControl = (): JSX.Element => {
switch (variable.type) {
case 'TEXT':
return (
<TextSelector
selection={selection}
defaultValue={variable.textValue}
onChange={onChange}
testId={`variable-input-${variable.name}`}
/>
);
case 'QUERY':
return (
<QuerySelector
variable={variable}
parents={parents}
selections={selections}
selection={selection}
onChange={onChange}
/>
);
case 'DYNAMIC':
return (
<DynamicSelector
variable={variable}
variables={variables}
selections={selections}
selection={selection}
onChange={onChange}
/>
);
case 'CUSTOM':
default:
return (
<ValueSelector
options={customOptions}
multiSelect={variable.multiSelect}
showAllOption={variable.showAllOption}
selection={selection}
onChange={onChange}
testId={`variable-select-${variable.name}`}
/>
);
}
};
return (
<div
className={styles.variableItem}
data-testid={`variable-${variable.name}`}
>
<Typography.Text className={styles.variableName}>
${variable.name}
{variable.description ? (
<Tooltip title={variable.description}>
<SolidInfoCircle className={styles.infoIcon} size="md" />
</Tooltip>
) : null}
</Typography.Text>
<div className={styles.variableValue}>{renderControl()}</div>
</div>
);
}
export default VariableSelector;

View File

@@ -1,71 +0,0 @@
/* Mirrors the V1 dashboard variable bar: each variable is a connected pill —
a robin `$name` segment joined to a value segment. */
/* Sits inside the already-padded sticky toolbar section, so it only needs a top
gap from the tags — horizontal/bottom padding comes from the toolbar. */
.bar {
display: flex;
flex-wrap: wrap;
gap: 12px;
padding-top: 12px;
}
.variableItem {
display: flex;
align-items: center;
}
.variableName {
display: flex;
min-width: 56px;
height: 32px;
align-items: center;
gap: 4px;
padding: 6px 6px 6px 8px;
border: 1px solid var(--l1-border);
border-radius: 2px 0 0 2px;
background: var(--l3-background);
color: var(--bg-robin-300);
font-family: Inter;
font-size: 12px;
font-weight: 400;
line-height: 16px;
white-space: nowrap;
}
.infoIcon {
margin-left: 4px;
color: var(--l2-foreground);
}
.variableValue {
display: flex;
min-width: 120px;
height: 32px;
align-items: center;
border: 1px solid var(--l1-border);
border-left: none;
border-radius: 0 2px 2px 0;
background: var(--l2-background);
color: var(--l2-foreground);
font-size: 12px;
&:hover,
&:focus-within {
outline: 1px solid var(--bg-robin-400);
}
}
/* Inner control fills the value segment; the segment provides the frame, so the
control itself is borderless/transparent. */
.control {
width: 100%;
min-width: 120px;
:global(.ant-select-selector),
:global(.ant-input),
&:global(.ant-input) {
border: none !important;
background: transparent !important;
box-shadow: none !important;
}
}

View File

@@ -1,45 +0,0 @@
import type { DashboardtypesGettableDashboardV2DTO } from 'api/generated/services/sigNoz.schemas';
import { useVariableSelection } from './useVariableSelection';
import VariableSelector from './VariableSelector';
import styles from './VariablesBar.module.scss';
interface VariablesBarProps {
dashboard: DashboardtypesGettableDashboardV2DTO;
}
/**
* Runtime variable selector bar shown above the panels. Renders one control per
* dashboard variable; selections live in the store + URL (never the spec).
*/
function VariablesBar({ dashboard }: VariablesBarProps): JSX.Element | null {
const { variables, dependencyData, selection, setSelection } =
useVariableSelection(dashboard);
if (variables.length === 0) {
return null;
}
return (
<div className={styles.bar} data-testid="dashboard-variables-bar">
{variables.map((variable) => (
<VariableSelector
key={variable.name}
variable={variable}
variables={variables}
parents={dependencyData.parentGraph[variable.name] ?? []}
selections={selection}
selection={
selection[variable.name] ?? {
value: variable.multiSelect ? [] : '',
allSelected: false,
}
}
onChange={(next): void => setSelection(variable.name, next)}
/>
))}
</div>
);
}
export default VariablesBar;

View File

@@ -1,56 +0,0 @@
import type { VariableFormModel } from '../DashboardSettings/Variables/variableModel';
import type { VariableSelectionMap } from './selectionTypes';
function formatQueryValue(val: string): string {
const num = Number(val);
if (!Number.isNaN(num) && Number.isFinite(num)) {
return val;
}
return `'${val.replace(/'/g, "\\'")}'`;
}
function buildQueryPart(attribute: string, values: string[]): string {
const formatted = values.map(formatQueryValue);
if (formatted.length === 1) {
return `${attribute} = ${formatted[0]}`;
}
return `${attribute} IN [${formatted.join(', ')}]`;
}
/**
* Builds a filter expression from the OTHER dynamic variables' current
* selections (e.g. `k8s.namespace.name IN ['prod'] AND service = 'api'`), so a
* dynamic variable's option list is scoped by its sibling selections. Variables
* in the ALL state, with no selection, or non-dynamic are skipped. Ported from
* the V1 dynamic-variable runtime.
*/
export function buildExistingDynamicVariableQuery(
variables: VariableFormModel[],
selections: VariableSelectionMap,
currentName: string,
): string {
const parts: string[] = [];
variables.forEach((variable) => {
if (
variable.name === currentName ||
variable.type !== 'DYNAMIC' ||
!variable.dynamicAttribute
) {
return;
}
const selection = selections[variable.name];
if (!selection || selection.allSelected) {
return;
}
const raw = Array.isArray(selection.value)
? selection.value
: [selection.value];
const valid = raw
.filter((v) => v !== null && v !== undefined && v !== '')
.map((v) => String(v));
if (valid.length > 0) {
parts.push(buildQueryPart(variable.dynamicAttribute, valid));
}
});
return parts.join(' AND ');
}

View File

@@ -1,16 +0,0 @@
/** A user-selected variable value at runtime (not persisted to the spec). */
export type SelectedVariableValue =
| string
| number
| boolean
| (string | number | boolean)[]
| null;
export interface VariableSelection {
value: SelectedVariableValue;
/** True when every option is selected ("ALL"); for dynamic vars value may be null. */
allSelected: boolean;
}
/** Selected values for a dashboard's variables, keyed by variable name. */
export type VariableSelectionMap = Record<string, VariableSelection>;

View File

@@ -1,31 +0,0 @@
import type {
SelectedVariableValue,
VariableSelection,
VariableSelectionMap,
} from './selectionTypes';
/** A selection counts as resolved (usable as a parent value) when it's non-empty. */
export function isResolved(selection?: VariableSelection): boolean {
if (!selection) {
return false;
}
if (selection.allSelected) {
return true;
}
const { value } = selection;
if (Array.isArray(value)) {
return value.length > 0;
}
return value !== '' && value !== null && value !== undefined;
}
/** Flatten the selection map into the `{ name: value }` payload a query expects. */
export function selectionToPayload(
selection: VariableSelectionMap,
): Record<string, SelectedVariableValue> {
const payload: Record<string, SelectedVariableValue> = {};
Object.entries(selection).forEach(([name, sel]) => {
payload[name] = sel.value;
});
return payload;
}

View File

@@ -1,82 +0,0 @@
import { useMemo } from 'react';
// eslint-disable-next-line no-restricted-imports
import { useSelector } from 'react-redux';
import { useGetFieldValues } from 'hooks/dynamicVariables/useGetFieldValues';
import type { AppState } from 'store/reducers';
import type { GlobalReducer } from 'types/reducer/globalTime';
import {
signalForApi,
sortValuesByOrder,
} from '../../DashboardSettings/Variables/variableModel';
import type { VariableFormModel } from '../../DashboardSettings/Variables/variableModel';
import { buildExistingDynamicVariableQuery } from '../dynamicFilter';
import type {
VariableSelection,
VariableSelectionMap,
} from '../selectionTypes';
import { useAutoSelect } from '../useAutoSelect';
import ValueSelector from './ValueSelector';
interface DynamicSelectorProps {
variable: VariableFormModel;
/** All variables + current selections, to scope options by sibling dynamics. */
variables: VariableFormModel[];
selections: VariableSelectionMap;
selection: VariableSelection;
onChange: (selection: VariableSelection) => void;
}
/**
* Dynamic-variable options sourced from live telemetry field values for the
* chosen signal + attribute, scoped by the other dynamic variables' selections
* (so e.g. `pod` narrows to the chosen `namespace`).
*/
function DynamicSelector({
variable,
variables,
selections,
selection,
onChange,
}: DynamicSelectorProps): JSX.Element {
const { minTime, maxTime } = useSelector<AppState, GlobalReducer>(
(state) => state.globalTime,
);
const existingQuery = useMemo(
() => buildExistingDynamicVariableQuery(variables, selections, variable.name),
[variables, selections, variable.name],
);
const { data, isFetching } = useGetFieldValues({
signal: signalForApi(variable.dynamicSignal),
name: variable.dynamicAttribute,
startUnixMilli: minTime,
endUnixMilli: maxTime,
existingQuery: existingQuery || undefined,
enabled: !!variable.dynamicAttribute,
});
const options = useMemo(() => {
const payload = data?.data;
const values =
payload?.normalizedValues ?? payload?.values?.StringValues ?? [];
return sortValuesByOrder(values, variable.sort).map(String);
}, [data, variable.sort]);
useAutoSelect(variable, options, selection, onChange);
return (
<ValueSelector
options={options}
multiSelect={variable.multiSelect}
showAllOption={variable.showAllOption}
loading={isFetching}
selection={selection}
onChange={onChange}
testId={`variable-select-${variable.name}`}
/>
);
}
export default DynamicSelector;

View File

@@ -1,90 +0,0 @@
import { useMemo } from 'react';
import { useQuery } from 'react-query';
// eslint-disable-next-line no-restricted-imports
import { useSelector } from 'react-redux';
import dashboardVariablesQuery from 'api/dashboard/variables/dashboardVariablesQuery';
import type { AppState } from 'store/reducers';
import type { GlobalReducer } from 'types/reducer/globalTime';
import { sortValuesByOrder } from '../../DashboardSettings/Variables/variableModel';
import type { VariableFormModel } from '../../DashboardSettings/Variables/variableModel';
import type {
VariableSelection,
VariableSelectionMap,
} from '../selectionTypes';
import { isResolved, selectionToPayload } from '../selectionUtils';
import { useAutoSelect } from '../useAutoSelect';
import ValueSelector from './ValueSelector';
interface QuerySelectorProps {
variable: VariableFormModel;
/** Names this variable's query references; it waits until they're resolved. */
parents: string[];
/** All current selections, fed to the query as `{ name: value }`. */
selections: VariableSelectionMap;
selection: VariableSelection;
onChange: (selection: VariableSelection) => void;
}
/**
* Query-driven options. Dependency orchestration is declarative: the query is
* `enabled` only once every parent is resolved, and the parent values are in the
* query key — so it refetches automatically when a parent changes (and a cyclic
* dependency is simply never enabled).
*/
function QuerySelector({
variable,
parents,
selections,
selection,
onChange,
}: QuerySelectorProps): JSX.Element {
const { minTime, maxTime } = useSelector<AppState, GlobalReducer>(
(state) => state.globalTime,
);
const payload = useMemo(() => selectionToPayload(selections), [selections]);
const enabled = parents.every((parent) => isResolved(selections[parent]));
const { data, isFetching } = useQuery(
[
'dashboard-variable',
variable.name,
variable.queryValue,
payload,
minTime,
maxTime,
],
() =>
dashboardVariablesQuery({
query: variable.queryValue,
variables: payload,
}),
{ enabled, refetchOnWindowFocus: false },
);
const options = useMemo(() => {
if (!data || data.statusCode !== 200 || !data.payload) {
return [] as string[];
}
return sortValuesByOrder(
data.payload.variableValues ?? [],
variable.sort,
).map(String);
}, [data, variable.sort]);
useAutoSelect(variable, options, selection, onChange);
return (
<ValueSelector
options={options}
multiSelect={variable.multiSelect}
showAllOption={variable.showAllOption}
loading={isFetching}
selection={selection}
onChange={onChange}
testId={`variable-select-${variable.name}`}
/>
);
}
export default QuerySelector;

View File

@@ -1,70 +0,0 @@
import { useCallback, useRef, useState } from 'react';
import type { InputRef } from 'antd';
// eslint-disable-next-line signoz/no-antd-components -- match V1 textbox behaviour (commit on blur/Enter, borderless)
import { Input } from 'antd';
import type { VariableSelection } from '../selectionTypes';
import styles from '../VariablesBar.module.scss';
interface TextSelectorProps {
selection: VariableSelection;
/** Configured default; an emptied input falls back to it (V1 behaviour). */
defaultValue?: string;
onChange: (selection: VariableSelection) => void;
testId?: string;
}
/**
* Free-text variable input. Mirrors V1: edits are local and only committed on
* blur / Enter (not per keystroke), and clearing the field restores the default.
*/
function TextSelector({
selection,
defaultValue,
onChange,
testId,
}: TextSelectorProps): JSX.Element {
const inputRef = useRef<InputRef>(null);
const [value, setValue] = useState<string>(
typeof selection.value === 'string' ? selection.value : (defaultValue ?? ''),
);
const commit = useCallback(
(next: string): void => onChange({ value: next, allSelected: false }),
[onChange],
);
const handleBlur = useCallback(
(event: React.FocusEvent<HTMLInputElement>): void => {
const trimmed = event.target.value.trim();
if (!trimmed && defaultValue) {
setValue(defaultValue);
commit(defaultValue);
} else {
commit(trimmed);
}
},
[commit, defaultValue],
);
return (
<Input
ref={inputRef}
className={styles.control}
bordered={false}
placeholder="Enter value"
value={value}
title={value}
onChange={(e): void => setValue(e.target.value)}
onBlur={handleBlur}
onKeyDown={(e): void => {
if (e.key === 'Enter') {
inputRef.current?.blur();
}
}}
data-testid={testId}
/>
);
}
export default TextSelector;

View File

@@ -1,94 +0,0 @@
import { useMemo } from 'react';
import { CustomMultiSelect, CustomSelect } from 'components/NewSelect';
import type { OptionData } from 'components/NewSelect/types';
import { ALL_SELECT_VALUE } from 'container/DashboardContainer/utils';
import type { VariableSelection } from '../selectionTypes';
import styles from '../VariablesBar.module.scss';
interface ValueSelectorProps {
options: string[];
multiSelect: boolean;
showAllOption: boolean;
loading?: boolean;
selection: VariableSelection;
onChange: (selection: VariableSelection) => void;
testId?: string;
}
/**
* Single/multi value picker for Custom/Query/Dynamic variables. Reuses the
* shared NewSelect components, which provide search, the "ALL" option and
* apply-on-close batching (so multi-select edits don't cascade per toggle).
*/
function ValueSelector({
options,
multiSelect,
showAllOption,
loading,
selection,
onChange,
testId,
}: ValueSelectorProps): JSX.Element {
const optionData = useMemo<OptionData[]>(
() => options.map((option) => ({ label: option, value: option })),
[options],
);
if (multiSelect) {
const value = selection.allSelected
? ALL_SELECT_VALUE
: (Array.isArray(selection.value) ? selection.value : []).map(String);
return (
<CustomMultiSelect
className={styles.control}
data-testid={testId}
options={optionData}
value={value}
loading={loading}
showSearch
placeholder="Select value"
enableAllSelection={showAllOption}
onChange={(next): void => {
const values = Array.isArray(next)
? next.map(String)
: next
? [String(next)]
: [];
if (values.length === 0) {
onChange({ value: [], allSelected: false });
return;
}
// CustomMultiSelect emits the full value set when ALL is picked.
const isAll =
showAllOption &&
options.length > 0 &&
options.every((option) => values.includes(option));
onChange({ value: values, allSelected: isAll });
}}
onClear={(): void => onChange({ value: [], allSelected: false })}
/>
);
}
return (
<CustomSelect
className={styles.select}
data-testid={testId}
options={optionData}
value={
selection.value == null || Array.isArray(selection.value)
? undefined
: String(selection.value)
}
loading={loading}
showSearch
placeholder="Select value"
onChange={(next): void =>
onChange({ value: next == null ? '' : String(next), allSelected: false })
}
/>
);
}
export default ValueSelector;

View File

@@ -1,41 +0,0 @@
import { useEffect } from 'react';
import type { VariableFormModel } from '../DashboardSettings/Variables/variableModel';
import type { VariableSelection } from './selectionTypes';
/**
* When fetched options arrive and the current selection isn't one of them,
* auto-pick the variable's default (if present in the options) or the first
* option — so dependent children always have a usable parent value.
*/
export function useAutoSelect(
variable: VariableFormModel,
options: string[],
selection: VariableSelection,
onChange: (selection: VariableSelection) => void,
): void {
useEffect(() => {
if (options.length === 0 || selection.allSelected) {
return;
}
const current = selection.value;
const isValid = Array.isArray(current)
? current.length > 0 && current.every((c) => options.includes(String(c)))
: current !== '' &&
current !== null &&
current !== undefined &&
options.includes(String(current));
if (isValid) {
return;
}
const fallback = (variable.defaultValue as { value?: string } | undefined)
?.value;
const initial =
fallback && options.includes(fallback) ? fallback : options[0];
onChange({
value: variable.multiSelect ? [initial] : initial,
allSelected: false,
});
// eslint-disable-next-line react-hooks/exhaustive-deps
}, [options]);
}

View File

@@ -1,116 +0,0 @@
import { useCallback, useEffect, useMemo } from 'react';
import { parseAsJson, useQueryState } from 'nuqs';
import type { DashboardtypesGettableDashboardV2DTO } from 'api/generated/services/sigNoz.schemas';
import { dtoToFormModel } from '../DashboardSettings/Variables/variableAdapters';
import type { VariableFormModel } from '../DashboardSettings/Variables/variableModel';
import { selectVariableValues } from '../store/slices/variableSelectionSlice';
import { useDashboardStore } from '../store/useDashboardStore';
import type {
SelectedVariableValue,
VariableSelection,
VariableSelectionMap,
} from './selectionTypes';
import {
computeVariableDependencies,
type VariableDependencyData,
} from './variableDependencies';
/** URL sentinel for an "ALL values selected" state (matches V1). */
export const ALL_SELECTED = '__ALL__';
/** `?variables=` holds `{ [name]: value }` (ALL encoded as the sentinel). */
const variablesUrlParser = parseAsJson<Record<string, SelectedVariableValue>>(
(v) =>
typeof v === 'object' && v !== null
? (v as Record<string, SelectedVariableValue>)
: null,
);
function defaultSelection(model: VariableFormModel): VariableSelection {
const def = (
model.defaultValue as { value?: SelectedVariableValue } | undefined
)?.value;
if (def !== undefined && def !== null && def !== '') {
return { value: def, allSelected: false };
}
return { value: model.multiSelect ? [] : '', allSelected: false };
}
function fromUrlValue(raw: SelectedVariableValue): VariableSelection {
return raw === ALL_SELECTED
? { value: null, allSelected: true }
: { value: raw, allSelected: false };
}
interface UseVariableSelection {
variables: VariableFormModel[];
dependencyData: VariableDependencyData;
selection: VariableSelectionMap;
setSelection: (name: string, selection: VariableSelection) => void;
}
/**
* Runtime variable selection: derives the variable list from the spec, seeds
* each value from URL → localStorage(store) → default, and persists changes to
* both the store and the URL. Never writes to the dashboard spec.
*/
export function useVariableSelection(
dashboard: DashboardtypesGettableDashboardV2DTO,
): UseVariableSelection {
const dashboardId = dashboard.id ?? '';
const variables = useMemo(
() => (dashboard.spec?.variables ?? []).map(dtoToFormModel),
[dashboard.spec?.variables],
);
const dependencyData = useMemo(
() => computeVariableDependencies(variables),
[variables],
);
const selection = useDashboardStore(selectVariableValues(dashboardId));
const setVariableValue = useDashboardStore((s) => s.setVariableValue);
const setVariableValues = useDashboardStore((s) => s.setVariableValues);
const [urlValues, setUrlValues] = useQueryState(
'variables',
variablesUrlParser.withOptions({ history: 'replace' }),
);
// Seed selections for this dashboard: URL wins, then persisted store, then default.
useEffect(() => {
if (!dashboardId || variables.length === 0) {
return;
}
// `selection` here is the persisted (localStorage) map on mount — the
// effect deliberately doesn't depend on it, so seeding runs once per set.
const stored = selection;
const seeded: VariableSelectionMap = {};
variables.forEach((variable) => {
const urlValue = urlValues?.[variable.name];
if (urlValue !== undefined) {
seeded[variable.name] = fromUrlValue(urlValue);
} else if (stored[variable.name]) {
seeded[variable.name] = stored[variable.name];
} else {
seeded[variable.name] = defaultSelection(variable);
}
});
setVariableValues(dashboardId, seeded);
// eslint-disable-next-line react-hooks/exhaustive-deps
}, [dashboardId, variables]);
const setSelection = useCallback(
(name: string, next: VariableSelection): void => {
setVariableValue(dashboardId, name, next);
void setUrlValues((prev) => ({
...(prev ?? {}),
[name]: next.allSelected ? ALL_SELECTED : next.value,
}));
},
[dashboardId, setVariableValue, setUrlValues],
);
return { variables, dependencyData, selection, setSelection };
}

View File

@@ -1,199 +0,0 @@
import { textContainsVariableReference } from 'lib/dashboardVariables/variableReference';
import type { VariableFormModel } from '../DashboardSettings/Variables/variableModel';
/**
* Inter-variable dependency graph for runtime selection. A QUERY variable
* "depends on" another variable when its query text references that variable
* (`{{.name}}`, `{{name}}`, `$name`, `[[name]]`). When a variable's value
* changes, its dependent QUERY variables must refetch. Ported from the V1
* dashboard-variables runtime; operates on the V2 flat variable model.
*/
export type VariableGraph = Record<string, string[]>;
export interface VariableDependencyData {
/** Topological order of variables (parents before children). */
order: string[];
/** Direct children (dependents) of each variable. */
graph: VariableGraph;
/** Direct parents of each variable. */
parentGraph: VariableGraph;
/** All transitive descendants of each variable (precomputed). */
transitiveDescendants: VariableGraph;
hasCycle: boolean;
cycleNodes?: string[];
}
/** Names of QUERY variables whose query references `variableName`. */
function getDependents(
variableName: string,
variables: VariableFormModel[],
): string[] {
return variables
.filter(
(v) =>
v.type === 'QUERY' &&
!!v.name &&
textContainsVariableReference(v.queryValue || '', variableName),
)
.map((v) => v.name);
}
/** variable name → its direct dependents (children). */
export function buildDependencies(
variables: VariableFormModel[],
): VariableGraph {
const graph: VariableGraph = {};
variables.forEach((v) => {
if (v.name) {
graph[v.name] = getDependents(v.name, variables);
}
});
return graph;
}
/** Invert a child graph into a parent graph. */
export function buildParentGraph(graph: VariableGraph): VariableGraph {
const parents: VariableGraph = {};
Object.keys(graph).forEach((node) => {
parents[node] = parents[node] ?? [];
});
Object.entries(graph).forEach(([node, children]) => {
children.forEach((child) => {
parents[child] = parents[child] ?? [];
parents[child].push(node);
});
});
return parents;
}
function collectCyclePath(
graph: VariableGraph,
start: string,
end: string,
): string[] {
const path: string[] = [];
let current = start;
const findParent = (node: string): string | undefined =>
Object.keys(graph).find((key) => graph[key]?.includes(node));
while (current !== end) {
const parent = findParent(current);
if (!parent) {
break;
}
path.push(parent);
current = parent;
}
return [start, ...path];
}
function detectCycle(
graph: VariableGraph,
node: string,
visited: Set<string>,
recStack: Set<string>,
): string[] | null {
if (!visited.has(node)) {
visited.add(node);
recStack.add(node);
let cycleNodes: string[] | null = null;
(graph[node] || []).some((neighbor) => {
if (!visited.has(neighbor)) {
const found = detectCycle(graph, neighbor, visited, recStack);
if (found) {
cycleNodes = found;
return true;
}
} else if (recStack.has(neighbor)) {
cycleNodes = collectCyclePath(graph, node, neighbor);
return true;
}
return false;
});
if (cycleNodes) {
return cycleNodes;
}
}
recStack.delete(node);
return null;
}
/** Build the full dependency data (topo order, parents, transitive descendants, cycle info). */
export function buildDependencyData(
dependencies: VariableGraph,
): VariableDependencyData {
const inDegree: Record<string, number> = {};
const adjList: VariableGraph = {};
Object.keys(dependencies).forEach((node) => {
inDegree[node] = inDegree[node] ?? 0;
adjList[node] = adjList[node] ?? [];
(dependencies[node] || []).forEach((child) => {
inDegree[child] = inDegree[child] ?? 0;
inDegree[child] += 1;
adjList[node].push(child);
});
});
const visited = new Set<string>();
const recStack = new Set<string>();
let cycleNodes: string[] | undefined;
Object.keys(dependencies).some((node) => {
if (!visited.has(node)) {
const found = detectCycle(dependencies, node, visited, recStack);
if (found) {
cycleNodes = found;
return true;
}
}
return false;
});
// Topological sort (Kahn's algorithm).
const queue = Object.keys(inDegree).filter((n) => inDegree[n] === 0);
const order: string[] = [];
while (queue.length > 0) {
const current = queue.shift();
if (current === undefined) {
break;
}
order.push(current);
(adjList[current] || []).forEach((neighbor) => {
inDegree[neighbor] -= 1;
if (inDegree[neighbor] === 0) {
queue.push(neighbor);
}
});
}
const hasCycle = order.length !== Object.keys(dependencies).length;
// Transitive descendants: walk topo order in reverse.
const transitiveDescendants: VariableGraph = {};
for (let i = order.length - 1; i >= 0; i--) {
const node = order[i];
const desc = new Set<string>();
(adjList[node] || []).forEach((child) => {
desc.add(child);
(transitiveDescendants[child] || []).forEach((d) => desc.add(d));
});
transitiveDescendants[node] = Array.from(desc);
}
return {
order,
graph: adjList,
parentGraph: buildParentGraph(adjList),
transitiveDescendants,
hasCycle,
cycleNodes,
};
}
/** Compute the full dependency data straight from the variable list. */
export function computeVariableDependencies(
variables: VariableFormModel[],
): VariableDependencyData {
return buildDependencyData(buildDependencies(variables));
}

View File

@@ -1,55 +0,0 @@
import type { StateCreator } from 'zustand';
import type {
VariableSelection,
VariableSelectionMap,
} from '../../VariablesBar/selectionTypes';
import type { DashboardStore } from '../useDashboardStore';
/**
* Runtime variable selection — the values the user picks in the variable bar.
* Keyed by dashboardId → variable name. Frontend-only and persisted to
* localStorage (mirrored to the URL by the bar for shareable links); it is
* deliberately NOT part of the dashboard spec, so selecting a value never
* patches the dashboard.
*/
export interface VariableSelectionSlice {
variableValues: Record<string, VariableSelectionMap>;
setVariableValue: (
dashboardId: string,
name: string,
selection: VariableSelection,
) => void;
/** Bulk set (used to seed from URL/localStorage/defaults on load). */
setVariableValues: (dashboardId: string, values: VariableSelectionMap) => void;
}
export const createVariableSelectionSlice: StateCreator<
DashboardStore,
[['zustand/persist', unknown]],
[],
VariableSelectionSlice
> = (set, get) => ({
variableValues: {},
setVariableValue: (dashboardId, name, selection): void => {
const { variableValues } = get();
set({
variableValues: {
...variableValues,
[dashboardId]: { ...variableValues[dashboardId], [name]: selection },
},
});
},
setVariableValues: (dashboardId, values): void => {
const { variableValues } = get();
set({
variableValues: { ...variableValues, [dashboardId]: values },
});
},
});
/** Selector: the selection map for a dashboard (empty if none). */
export const selectVariableValues =
(dashboardId: string) =>
(state: DashboardStore): VariableSelectionMap =>
state.variableValues[dashboardId] ?? {};

View File

@@ -9,36 +9,25 @@ import {
createCollapseSlice,
type CollapseSlice,
} from './slices/collapseSlice';
import {
createVariableSelectionSlice,
type VariableSelectionSlice,
} from './slices/variableSelectionSlice';
export type DashboardStore = EditContextSlice &
CollapseSlice &
VariableSelectionSlice;
export type DashboardStore = EditContextSlice & CollapseSlice;
/**
* V2 dashboard session store. Holds cross-cutting client state only — never the
* dashboard spec (that stays in react-query via useGetDashboardV2). Slices:
* dashboard spec (that stays in react-query via useGetDashboardV2). Two slices:
* - edit-context: dashboardId / isEditable / refetch (set once, not persisted).
* - collapse: per-section open state (frontend-only, persisted to localStorage).
* - variable-selection: runtime variable values (frontend-only, persisted).
*/
export const useDashboardStore = create<DashboardStore>()(
persist(
(...a) => ({
...createEditContextSlice(...a),
...createCollapseSlice(...a),
...createVariableSelectionSlice(...a),
}),
{
name: '@signoz/dashboard-v2',
// Persist UI-only state (context incl. the refetch fn is transient).
partialize: (state) => ({
collapsed: state.collapsed,
variableValues: state.variableValues,
}),
// Persist only the collapse map — context (incl. the refetch fn) is transient.
partialize: (state) => ({ collapsed: state.collapsed }),
},
),
);

View File

@@ -16,7 +16,7 @@ func (provider *provider) addInfraMonitoringRoutes(router *mux.Router) error {
ID: "ListHosts",
Tags: []string{"inframonitoring"},
Summary: "List Hosts for Infra Monitoring",
Description: "Returns a paginated list of hosts with key infrastructure metrics: CPU usage (%), memory usage (%), I/O wait (%), disk usage (%), and 15-minute load average. Each host includes its current status (active/inactive based on metrics reported in the last 10 minutes) and metadata attributes (e.g., os.type). Supports filtering via a filter expression, filtering by host status, custom groupBy to aggregate hosts by any attribute, ordering by any of the five metrics, and pagination via offset/limit. The response type is 'list' for the default host.name grouping or 'grouped_list' for custom groupBy keys. Also reports missing required metrics and whether the requested time range falls before the data retention boundary. Numeric metric fields (cpu, memory, wait, load15, diskUsage) return -1 as a sentinel when no data is available for that field.",
Description: "Returns a paginated list of hosts with key infrastructure metrics: CPU usage (%), memory usage (%), I/O wait (%), disk usage (%), and 15-minute load average. Each host includes its current status (active/inactive based on metrics reported in the last 10 minutes) and metadata attributes (e.g., os.type). Supports filtering via a filter expression, filtering by host status, custom groupBy to aggregate hosts by any attribute, ordering by any of the five metrics, and pagination via offset/limit. The response type is 'list' for the default host.name grouping or 'grouped_list' for custom groupBy keys. Also reports whether the requested time range falls before the data retention boundary. Numeric metric fields (cpu, memory, wait, load15, diskUsage) return -1 as a sentinel when no data is available for that field.",
Request: new(inframonitoringtypes.PostableHosts),
RequestContentType: "application/json",
Response: new(inframonitoringtypes.Hosts),
@@ -35,7 +35,7 @@ func (provider *provider) addInfraMonitoringRoutes(router *mux.Router) error {
ID: "ListPods",
Tags: []string{"inframonitoring"},
Summary: "List Pods for Infra Monitoring",
Description: "Returns a paginated list of Kubernetes pods with key metrics: CPU usage, CPU request/limit utilization, memory working set, memory request/limit utilization, current pod phase (pending/running/succeeded/failed/unknown/no_data), and pod age (ms since start time). Each pod includes metadata attributes (namespace, node, workload owner such as deployment/statefulset/daemonset/job/cronjob, cluster). Supports filtering via a filter expression, custom groupBy to aggregate pods by any attribute, ordering by any of the six metrics (cpu, cpu_request, cpu_limit, memory, memory_request, memory_limit), and pagination via offset/limit. The response type is 'list' for the default k8s.pod.uid grouping (each row is one pod with its current phase) or 'grouped_list' for custom groupBy keys (each row aggregates pods in the group with per-phase counts under podCountsByPhase: { pending, running, succeeded, failed, unknown } derived from each pod's latest phase in the window). Also reports missing required metrics and whether the requested time range falls before the data retention boundary. Numeric metric fields (podCPU, podCPURequest, podCPULimit, podMemory, podMemoryRequest, podMemoryLimit, podAge) return -1 as a sentinel when no data is available for that field.",
Description: "Returns a paginated list of Kubernetes pods with key metrics: CPU usage, CPU request/limit utilization, memory working set, memory request/limit utilization, current pod phase (pending/running/succeeded/failed/unknown/no_data), and pod age (ms since start time). Each pod includes metadata attributes (namespace, node, workload owner such as deployment/statefulset/daemonset/job/cronjob, cluster). Supports filtering via a filter expression, custom groupBy to aggregate pods by any attribute, ordering by any of the six metrics (cpu, cpu_request, cpu_limit, memory, memory_request, memory_limit), and pagination via offset/limit. The response type is 'list' for the default k8s.pod.uid grouping (each row is one pod with its current phase) or 'grouped_list' for custom groupBy keys (each row aggregates pods in the group with per-phase counts under podCountsByPhase: { pending, running, succeeded, failed, unknown } derived from each pod's latest phase in the window). Also reports whether the requested time range falls before the data retention boundary. Numeric metric fields (podCPU, podCPURequest, podCPULimit, podMemory, podMemoryRequest, podMemoryLimit, podAge) return -1 as a sentinel when no data is available for that field.",
Request: new(inframonitoringtypes.PostablePods),
RequestContentType: "application/json",
Response: new(inframonitoringtypes.Pods),
@@ -54,7 +54,7 @@ func (provider *provider) addInfraMonitoringRoutes(router *mux.Router) error {
ID: "ListNodes",
Tags: []string{"inframonitoring"},
Summary: "List Nodes for Infra Monitoring",
Description: "Returns a paginated list of Kubernetes nodes with key metrics: CPU usage, CPU allocatable, memory working set, memory allocatable, per-group nodeCountsByReadiness ({ ready, notReady } from each node's latest k8s.node.condition_ready in the window) and per-group podCountsByPhase ({ pending, running, succeeded, failed, unknown } for pods scheduled on the listed nodes). Each node includes metadata attributes (k8s.node.uid, k8s.cluster.name). The response type is 'list' for the default k8s.node.name grouping (each row is one node with its current condition string: ready / not_ready / no_data) or 'grouped_list' for custom groupBy keys (each row aggregates nodes in the group; condition stays no_data). Supports filtering via a filter expression, custom groupBy, ordering by cpu / cpu_allocatable / memory / memory_allocatable, and pagination via offset/limit. Also reports missing required metrics and whether the requested time range falls before the data retention boundary. Numeric metric fields (nodeCPU, nodeCPUAllocatable, nodeMemory, nodeMemoryAllocatable) return -1 as a sentinel when no data is available for that field.",
Description: "Returns a paginated list of Kubernetes nodes with key metrics: CPU usage, CPU allocatable, memory working set, memory allocatable, per-group nodeCountsByReadiness ({ ready, notReady } from each node's latest k8s.node.condition_ready in the window) and per-group podCountsByPhase ({ pending, running, succeeded, failed, unknown } for pods scheduled on the listed nodes). Each node includes metadata attributes (k8s.node.uid, k8s.cluster.name). The response type is 'list' for the default k8s.node.name grouping (each row is one node with its current condition string: ready / not_ready / no_data) or 'grouped_list' for custom groupBy keys (each row aggregates nodes in the group; condition stays no_data). Supports filtering via a filter expression, custom groupBy, ordering by cpu / cpu_allocatable / memory / memory_allocatable, and pagination via offset/limit. Also reports whether the requested time range falls before the data retention boundary. Numeric metric fields (nodeCPU, nodeCPUAllocatable, nodeMemory, nodeMemoryAllocatable) return -1 as a sentinel when no data is available for that field.",
Request: new(inframonitoringtypes.PostableNodes),
RequestContentType: "application/json",
Response: new(inframonitoringtypes.Nodes),
@@ -73,7 +73,7 @@ func (provider *provider) addInfraMonitoringRoutes(router *mux.Router) error {
ID: "ListNamespaces",
Tags: []string{"inframonitoring"},
Summary: "List Namespaces for Infra Monitoring",
Description: "Returns a paginated list of Kubernetes namespaces with key aggregated pod metrics: CPU usage and memory working set (summed across pods in the group), plus per-group podCountsByPhase ({ pending, running, succeeded, failed, unknown } from each pod's latest k8s.pod.phase value in the window). Each namespace includes metadata attributes (k8s.namespace.name, k8s.cluster.name). The response type is 'list' for the default k8s.namespace.name grouping or 'grouped_list' for custom groupBy keys; in both modes every row aggregates pods in the group. Supports filtering via a filter expression, custom groupBy, ordering by cpu / memory, and pagination via offset/limit. Also reports missing required metrics and whether the requested time range falls before the data retention boundary. Numeric metric fields (namespaceCPU, namespaceMemory) return -1 as a sentinel when no data is available for that field.",
Description: "Returns a paginated list of Kubernetes namespaces with key aggregated pod metrics: CPU usage and memory working set (summed across pods in the group), plus per-group podCountsByPhase ({ pending, running, succeeded, failed, unknown } from each pod's latest k8s.pod.phase value in the window). Each namespace includes metadata attributes (k8s.namespace.name, k8s.cluster.name). The response type is 'list' for the default k8s.namespace.name grouping or 'grouped_list' for custom groupBy keys; in both modes every row aggregates pods in the group. Supports filtering via a filter expression, custom groupBy, ordering by cpu / memory, and pagination via offset/limit. Also reports whether the requested time range falls before the data retention boundary. Numeric metric fields (namespaceCPU, namespaceMemory) return -1 as a sentinel when no data is available for that field.",
Request: new(inframonitoringtypes.PostableNamespaces),
RequestContentType: "application/json",
Response: new(inframonitoringtypes.Namespaces),
@@ -92,7 +92,7 @@ func (provider *provider) addInfraMonitoringRoutes(router *mux.Router) error {
ID: "ListClusters",
Tags: []string{"inframonitoring"},
Summary: "List Clusters for Infra Monitoring",
Description: "Returns a paginated list of Kubernetes clusters with key aggregated metrics derived by summing per-node values within the group: CPU usage, CPU allocatable, memory working set, memory allocatable. Each row also reports per-group nodeCountsByReadiness ({ ready, notReady } from each node's latest k8s.node.condition_ready value) and per-group podCountsByPhase ({ pending, running, succeeded, failed, unknown } from each pod's latest k8s.pod.phase value). Each cluster includes metadata attributes (k8s.cluster.name). The response type is 'list' for the default k8s.cluster.name grouping or 'grouped_list' for custom groupBy keys; in both modes every row aggregates nodes and pods in the group. Supports filtering via a filter expression, custom groupBy, ordering by cpu / cpu_allocatable / memory / memory_allocatable, and pagination via offset/limit. Also reports missing required metrics and whether the requested time range falls before the data retention boundary. Numeric metric fields (clusterCPU, clusterCPUAllocatable, clusterMemory, clusterMemoryAllocatable) return -1 as a sentinel when no data is available for that field.",
Description: "Returns a paginated list of Kubernetes clusters with key aggregated metrics derived by summing per-node values within the group: CPU usage, CPU allocatable, memory working set, memory allocatable. Each row also reports per-group nodeCountsByReadiness ({ ready, notReady } from each node's latest k8s.node.condition_ready value) and per-group podCountsByPhase ({ pending, running, succeeded, failed, unknown } from each pod's latest k8s.pod.phase value). Each cluster includes metadata attributes (k8s.cluster.name). The response type is 'list' for the default k8s.cluster.name grouping or 'grouped_list' for custom groupBy keys; in both modes every row aggregates nodes and pods in the group. Supports filtering via a filter expression, custom groupBy, ordering by cpu / cpu_allocatable / memory / memory_allocatable, and pagination via offset/limit. Also reports whether the requested time range falls before the data retention boundary. Numeric metric fields (clusterCPU, clusterCPUAllocatable, clusterMemory, clusterMemoryAllocatable) return -1 as a sentinel when no data is available for that field.",
Request: new(inframonitoringtypes.PostableClusters),
RequestContentType: "application/json",
Response: new(inframonitoringtypes.Clusters),
@@ -111,7 +111,7 @@ func (provider *provider) addInfraMonitoringRoutes(router *mux.Router) error {
ID: "ListVolumes",
Tags: []string{"inframonitoring"},
Summary: "List Volumes for Infra Monitoring",
Description: "Returns a paginated list of Kubernetes persistent volume claims (PVCs) with key volume metrics: available bytes, capacity bytes, usage (capacity - available), inodes, free inodes, and used inodes. Each row also includes metadata attributes (k8s.persistentvolumeclaim.name, k8s.pod.uid, k8s.pod.name, k8s.namespace.name, k8s.node.name, k8s.statefulset.name, k8s.cluster.name). Supports filtering via a filter expression, custom groupBy to aggregate volumes by any attribute, ordering by any of the six metrics (available, capacity, usage, inodes, inodes_free, inodes_used), and pagination via offset/limit. The response type is 'list' for the default k8s.persistentvolumeclaim.name grouping or 'grouped_list' for custom groupBy keys; in both modes every row aggregates volumes in the group. Also reports missing required metrics and whether the requested time range falls before the data retention boundary. Numeric metric fields (volumeAvailable, volumeCapacity, volumeUsage, volumeInodes, volumeInodesFree, volumeInodesUsed) return -1 as a sentinel when no data is available for that field.",
Description: "Returns a paginated list of Kubernetes persistent volume claims (PVCs) with key volume metrics: available bytes, capacity bytes, usage (capacity - available), inodes, free inodes, and used inodes. Each row also includes metadata attributes (k8s.persistentvolumeclaim.name, k8s.pod.uid, k8s.pod.name, k8s.namespace.name, k8s.node.name, k8s.statefulset.name, k8s.cluster.name). Supports filtering via a filter expression, custom groupBy to aggregate volumes by any attribute, ordering by any of the six metrics (available, capacity, usage, inodes, inodes_free, inodes_used), and pagination via offset/limit. The response type is 'list' for the default k8s.persistentvolumeclaim.name grouping or 'grouped_list' for custom groupBy keys; in both modes every row aggregates volumes in the group. Also reports whether the requested time range falls before the data retention boundary. Numeric metric fields (volumeAvailable, volumeCapacity, volumeUsage, volumeInodes, volumeInodesFree, volumeInodesUsed) return -1 as a sentinel when no data is available for that field.",
Request: new(inframonitoringtypes.PostableVolumes),
RequestContentType: "application/json",
Response: new(inframonitoringtypes.Volumes),
@@ -130,7 +130,7 @@ func (provider *provider) addInfraMonitoringRoutes(router *mux.Router) error {
ID: "ListDeployments",
Tags: []string{"inframonitoring"},
Summary: "List Deployments for Infra Monitoring",
Description: "Returns a paginated list of Kubernetes Deployments with key aggregated pod metrics: CPU usage and memory working set summed across pods owned by the deployment, plus average CPU/memory request and limit utilization (deploymentCPURequest, deploymentCPULimit, deploymentMemoryRequest, deploymentMemoryLimit). Each row also reports the latest known desiredPods (k8s.deployment.desired) and availablePods (k8s.deployment.available) replica counts and per-group podCountsByPhase ({ pending, running, succeeded, failed, unknown } from each pod's latest k8s.pod.phase value). Each deployment includes metadata attributes (k8s.deployment.name, k8s.namespace.name, k8s.cluster.name). The response type is 'list' for the default k8s.deployment.name grouping or 'grouped_list' for custom groupBy keys; in both modes every row aggregates pods owned by deployments in the group. Supports filtering via a filter expression, custom groupBy, ordering by cpu / cpu_request / cpu_limit / memory / memory_request / memory_limit / desired_pods / available_pods, and pagination via offset/limit. Also reports missing required metrics and whether the requested time range falls before the data retention boundary. Numeric metric fields (deploymentCPU, deploymentCPURequest, deploymentCPULimit, deploymentMemory, deploymentMemoryRequest, deploymentMemoryLimit, desiredPods, availablePods) return -1 as a sentinel when no data is available for that field.",
Description: "Returns a paginated list of Kubernetes Deployments with key aggregated pod metrics: CPU usage and memory working set summed across pods owned by the deployment, plus average CPU/memory request and limit utilization (deploymentCPURequest, deploymentCPULimit, deploymentMemoryRequest, deploymentMemoryLimit). Each row also reports the latest known desiredPods (k8s.deployment.desired) and availablePods (k8s.deployment.available) replica counts and per-group podCountsByPhase ({ pending, running, succeeded, failed, unknown } from each pod's latest k8s.pod.phase value). Each deployment includes metadata attributes (k8s.deployment.name, k8s.namespace.name, k8s.cluster.name). The response type is 'list' for the default k8s.deployment.name grouping or 'grouped_list' for custom groupBy keys; in both modes every row aggregates pods owned by deployments in the group. Supports filtering via a filter expression, custom groupBy, ordering by cpu / cpu_request / cpu_limit / memory / memory_request / memory_limit / desired_pods / available_pods, and pagination via offset/limit. Also reports whether the requested time range falls before the data retention boundary. Numeric metric fields (deploymentCPU, deploymentCPURequest, deploymentCPULimit, deploymentMemory, deploymentMemoryRequest, deploymentMemoryLimit, desiredPods, availablePods) return -1 as a sentinel when no data is available for that field.",
Request: new(inframonitoringtypes.PostableDeployments),
RequestContentType: "application/json",
Response: new(inframonitoringtypes.Deployments),
@@ -149,7 +149,7 @@ func (provider *provider) addInfraMonitoringRoutes(router *mux.Router) error {
ID: "ListStatefulSets",
Tags: []string{"inframonitoring"},
Summary: "List StatefulSets for Infra Monitoring",
Description: "Returns a paginated list of Kubernetes StatefulSets with key aggregated pod metrics: CPU usage and memory working set summed across pods owned by the statefulset, plus average CPU/memory request and limit utilization (statefulSetCPURequest, statefulSetCPULimit, statefulSetMemoryRequest, statefulSetMemoryLimit). Each row also reports the latest known desiredPods (k8s.statefulset.desired_pods) and currentPods (k8s.statefulset.current_pods) replica counts and per-group podCountsByPhase ({ pending, running, succeeded, failed, unknown } from each pod's latest k8s.pod.phase value). Each statefulset includes metadata attributes (k8s.statefulset.name, k8s.namespace.name, k8s.cluster.name). The response type is 'list' for the default k8s.statefulset.name grouping or 'grouped_list' for custom groupBy keys; in both modes every row aggregates pods owned by statefulsets in the group. Supports filtering via a filter expression, custom groupBy, ordering by cpu / cpu_request / cpu_limit / memory / memory_request / memory_limit / desired_pods / current_pods, and pagination via offset/limit. Also reports missing required metrics and whether the requested time range falls before the data retention boundary. Numeric metric fields (statefulSetCPU, statefulSetCPURequest, statefulSetCPULimit, statefulSetMemory, statefulSetMemoryRequest, statefulSetMemoryLimit, desiredPods, currentPods) return -1 as a sentinel when no data is available for that field.",
Description: "Returns a paginated list of Kubernetes StatefulSets with key aggregated pod metrics: CPU usage and memory working set summed across pods owned by the statefulset, plus average CPU/memory request and limit utilization (statefulSetCPURequest, statefulSetCPULimit, statefulSetMemoryRequest, statefulSetMemoryLimit). Each row also reports the latest known desiredPods (k8s.statefulset.desired_pods) and currentPods (k8s.statefulset.current_pods) replica counts and per-group podCountsByPhase ({ pending, running, succeeded, failed, unknown } from each pod's latest k8s.pod.phase value). Each statefulset includes metadata attributes (k8s.statefulset.name, k8s.namespace.name, k8s.cluster.name). The response type is 'list' for the default k8s.statefulset.name grouping or 'grouped_list' for custom groupBy keys; in both modes every row aggregates pods owned by statefulsets in the group. Supports filtering via a filter expression, custom groupBy, ordering by cpu / cpu_request / cpu_limit / memory / memory_request / memory_limit / desired_pods / current_pods, and pagination via offset/limit. Also reports whether the requested time range falls before the data retention boundary. Numeric metric fields (statefulSetCPU, statefulSetCPURequest, statefulSetCPULimit, statefulSetMemory, statefulSetMemoryRequest, statefulSetMemoryLimit, desiredPods, currentPods) return -1 as a sentinel when no data is available for that field.",
Request: new(inframonitoringtypes.PostableStatefulSets),
RequestContentType: "application/json",
Response: new(inframonitoringtypes.StatefulSets),
@@ -168,7 +168,7 @@ func (provider *provider) addInfraMonitoringRoutes(router *mux.Router) error {
ID: "ListJobs",
Tags: []string{"inframonitoring"},
Summary: "List Jobs for Infra Monitoring",
Description: "Returns a paginated list of Kubernetes Jobs with key aggregated pod metrics: CPU usage and memory working set summed across pods owned by the job, plus average CPU/memory request and limit utilization (jobCPURequest, jobCPULimit, jobMemoryRequest, jobMemoryLimit). Each row also reports the latest known job-level counters from kube-state-metrics: desiredSuccessfulPods (k8s.job.desired_successful_pods, the target completion count), activePods (k8s.job.active_pods), failedPods (k8s.job.failed_pods, cumulative across the lifetime of the job), and successfulPods (k8s.job.successful_pods, cumulative). It also reports per-group podCountsByPhase ({ pending, running, succeeded, failed, unknown } from each pod's latest k8s.pod.phase value); note podCountsByPhase.failed (current pod-phase) is distinct from failedPods (cumulative job kube-state-metric). Each job includes metadata attributes (k8s.job.name, k8s.namespace.name, k8s.cluster.name). The response type is 'list' for the default k8s.job.name grouping or 'grouped_list' for custom groupBy keys; in both modes every row aggregates pods owned by jobs in the group. Supports filtering via a filter expression, custom groupBy, ordering by cpu / cpu_request / cpu_limit / memory / memory_request / memory_limit / desired_successful_pods / active_pods / failed_pods / successful_pods, and pagination via offset/limit. Also reports missing required metrics and whether the requested time range falls before the data retention boundary. Numeric metric fields (jobCPU, jobCPURequest, jobCPULimit, jobMemory, jobMemoryRequest, jobMemoryLimit, desiredSuccessfulPods, activePods, failedPods, successfulPods) return -1 as a sentinel when no data is available for that field.",
Description: "Returns a paginated list of Kubernetes Jobs with key aggregated pod metrics: CPU usage and memory working set summed across pods owned by the job, plus average CPU/memory request and limit utilization (jobCPURequest, jobCPULimit, jobMemoryRequest, jobMemoryLimit). Each row also reports the latest known job-level counters from kube-state-metrics: desiredSuccessfulPods (k8s.job.desired_successful_pods, the target completion count), activePods (k8s.job.active_pods), failedPods (k8s.job.failed_pods, cumulative across the lifetime of the job), and successfulPods (k8s.job.successful_pods, cumulative). It also reports per-group podCountsByPhase ({ pending, running, succeeded, failed, unknown } from each pod's latest k8s.pod.phase value); note podCountsByPhase.failed (current pod-phase) is distinct from failedPods (cumulative job kube-state-metric). Each job includes metadata attributes (k8s.job.name, k8s.namespace.name, k8s.cluster.name). The response type is 'list' for the default k8s.job.name grouping or 'grouped_list' for custom groupBy keys; in both modes every row aggregates pods owned by jobs in the group. Supports filtering via a filter expression, custom groupBy, ordering by cpu / cpu_request / cpu_limit / memory / memory_request / memory_limit / desired_successful_pods / active_pods / failed_pods / successful_pods, and pagination via offset/limit. Also reports whether the requested time range falls before the data retention boundary. Numeric metric fields (jobCPU, jobCPURequest, jobCPULimit, jobMemory, jobMemoryRequest, jobMemoryLimit, desiredSuccessfulPods, activePods, failedPods, successfulPods) return -1 as a sentinel when no data is available for that field.",
Request: new(inframonitoringtypes.PostableJobs),
RequestContentType: "application/json",
Response: new(inframonitoringtypes.Jobs),
@@ -187,7 +187,7 @@ func (provider *provider) addInfraMonitoringRoutes(router *mux.Router) error {
ID: "ListDaemonSets",
Tags: []string{"inframonitoring"},
Summary: "List DaemonSets for Infra Monitoring",
Description: "Returns a paginated list of Kubernetes DaemonSets with key aggregated pod metrics: CPU usage and memory working set summed across pods owned by the daemonset, plus average CPU/memory request and limit utilization (daemonSetCPURequest, daemonSetCPULimit, daemonSetMemoryRequest, daemonSetMemoryLimit). Each row also reports the latest known node-level counters from kube-state-metrics: desiredNodes (k8s.daemonset.desired_scheduled_nodes, the number of nodes the daemonset wants to run on) and currentNodes (k8s.daemonset.current_scheduled_nodes, the number of nodes the daemonset currently runs on) — note these are node counts, not pod counts. It also reports per-group podCountsByPhase ({ pending, running, succeeded, failed, unknown } from each pod's latest k8s.pod.phase value). Each daemonset includes metadata attributes (k8s.daemonset.name, k8s.namespace.name, k8s.cluster.name). The response type is 'list' for the default k8s.daemonset.name grouping or 'grouped_list' for custom groupBy keys; in both modes every row aggregates pods owned by daemonsets in the group. Supports filtering via a filter expression, custom groupBy, ordering by cpu / cpu_request / cpu_limit / memory / memory_request / memory_limit / desired_nodes / current_nodes, and pagination via offset/limit. Also reports missing required metrics and whether the requested time range falls before the data retention boundary. Numeric metric fields (daemonSetCPU, daemonSetCPURequest, daemonSetCPULimit, daemonSetMemory, daemonSetMemoryRequest, daemonSetMemoryLimit, desiredNodes, currentNodes) return -1 as a sentinel when no data is available for that field.",
Description: "Returns a paginated list of Kubernetes DaemonSets with key aggregated pod metrics: CPU usage and memory working set summed across pods owned by the daemonset, plus average CPU/memory request and limit utilization (daemonSetCPURequest, daemonSetCPULimit, daemonSetMemoryRequest, daemonSetMemoryLimit). Each row also reports the latest known node-level counters from kube-state-metrics: desiredNodes (k8s.daemonset.desired_scheduled_nodes, the number of nodes the daemonset wants to run on) and currentNodes (k8s.daemonset.current_scheduled_nodes, the number of nodes the daemonset currently runs on) — note these are node counts, not pod counts. It also reports per-group podCountsByPhase ({ pending, running, succeeded, failed, unknown } from each pod's latest k8s.pod.phase value). Each daemonset includes metadata attributes (k8s.daemonset.name, k8s.namespace.name, k8s.cluster.name). The response type is 'list' for the default k8s.daemonset.name grouping or 'grouped_list' for custom groupBy keys; in both modes every row aggregates pods owned by daemonsets in the group. Supports filtering via a filter expression, custom groupBy, ordering by cpu / cpu_request / cpu_limit / memory / memory_request / memory_limit / desired_nodes / current_nodes, and pagination via offset/limit. Also reports whether the requested time range falls before the data retention boundary. Numeric metric fields (daemonSetCPU, daemonSetCPURequest, daemonSetCPULimit, daemonSetMemory, daemonSetMemoryRequest, daemonSetMemoryLimit, desiredNodes, currentNodes) return -1 as a sentinel when no data is available for that field.",
Request: new(inframonitoringtypes.PostableDaemonSets),
RequestContentType: "application/json",
Response: new(inframonitoringtypes.DaemonSets),

View File

@@ -413,64 +413,27 @@ func (m *module) buildFilterClause(ctx context.Context, filter *qbtypes.Filter,
// NOTE: this method is not specific to infra monitoring — it queries attributes_metadata generically.
// Consider moving to telemetryMetaStore when a second use case emerges.
//
// getMetricsExistenceAndEarliestTime checks which of the given metric names have been
// reported. It returns a list of missing metrics (those not found or with zero count)
// and the earliest first-reported timestamp across all present metrics.
// When all metrics are missing, minFirstReportedUnixMilli is 0.
// TODO(nikhilmantri0902, srikanthccv): This method was designed this way because querier errors if any of the metrics
// in the querier list was never sent, the QueryRange call throws not found error. Modify this method, if QueryRange
// behaviour changes towards this.
func (m *module) getMetricsExistenceAndEarliestTime(ctx context.Context, metricNames []string) ([]string, uint64, error) {
// getEarliestMetricTime returns the earliest first_reported_unix_milli across the
// given metric names. It is used solely for the end-time-before-retention check.
// When none of the metrics have ever been reported, it returns 0.
func (m *module) getEarliestMetricTime(ctx context.Context, metricNames []string) (uint64, error) {
if len(metricNames) == 0 {
return nil, 0, nil
return 0, nil
}
sb := sqlbuilder.NewSelectBuilder()
sb.Select("metric_name", "count(*) AS cnt", "min(first_reported_unix_milli) AS min_first_reported")
sb.Select("min(first_reported_unix_milli) AS min_first_reported")
sb.From(fmt.Sprintf("%s.%s", telemetrymetrics.DBName, telemetrymetrics.AttributesMetadataTableName))
sb.Where(sb.In("metric_name", sqlbuilder.List(metricNames)))
sb.GroupBy("metric_name")
query, args := sb.BuildWithFlavor(sqlbuilder.ClickHouse)
rows, err := m.telemetryStore.ClickhouseDB().Query(ctx, query, args...)
if err != nil {
return nil, 0, err
}
defer rows.Close()
type metricInfo struct {
count uint64
minFirstReported uint64
}
found := make(map[string]metricInfo, len(metricNames))
for rows.Next() {
var name string
var cnt, minFR uint64
if err := rows.Scan(&name, &cnt, &minFR); err != nil {
return nil, 0, err
}
found[name] = metricInfo{count: cnt, minFirstReported: minFR}
}
if err := rows.Err(); err != nil {
return nil, 0, err
var minFirstReported uint64
if err := m.telemetryStore.ClickhouseDB().QueryRow(ctx, query, args...).Scan(&minFirstReported); err != nil {
return 0, err
}
var missingMetrics []string
var globalMinFirstReported uint64
for _, name := range metricNames {
info, ok := found[name]
if !ok || info.count == 0 {
missingMetrics = append(missingMetrics, name)
continue
}
if globalMinFirstReported == 0 || info.minFirstReported < globalMinFirstReported {
globalMinFirstReported = info.minFirstReported
}
}
return missingMetrics, globalMinFirstReported, nil
return minFirstReported, nil
}
// getMetadata fetches the latest values of additionalCols for each unique combination of groupBy keys,

View File

@@ -78,26 +78,18 @@ func (m *module) ListHosts(ctx context.Context, orgID valuer.UUID, req *inframon
resp.Type = inframonitoringtypes.ResponseTypeGroupedList
}
// 1. Check which required metrics exist and get earliest retention time.
// If any required metric is missing, return early with the list of missing metrics.
// 2. If metrics exist but req.End is before the earliest reported time, return early with endTimeBeforeRetention=true.
missingMetrics, minFirstReportedUnixMilli, err := m.getMetricsExistenceAndEarliestTime(ctx, hostsTableMetricNamesList)
// If req.End is before the earliest reported time for these metrics, return early
// with endTimeBeforeRetention=true.
minFirstReportedUnixMilli, err := m.getEarliestMetricTime(ctx, hostsTableMetricNamesList)
if err != nil {
return nil, err
}
if len(missingMetrics) > 0 {
resp.RequiredMetricsCheck = inframonitoringtypes.RequiredMetricsCheck{MissingMetrics: missingMetrics}
resp.Records = []inframonitoringtypes.HostRecord{}
resp.Total = 0
return resp, nil
}
if req.End < int64(minFirstReportedUnixMilli) {
resp.EndTimeBeforeRetention = true
resp.Records = []inframonitoringtypes.HostRecord{}
resp.Total = 0
return resp, nil
}
resp.RequiredMetricsCheck = inframonitoringtypes.RequiredMetricsCheck{MissingMetrics: []string{}}
// TOD(nikhilmantri0902): replace this separate ClickHouse query with a sub-query inside the main query builder query
// once QB supports sub-queries.
@@ -191,23 +183,16 @@ func (m *module) ListPods(ctx context.Context, orgID valuer.UUID, req *inframoni
resp.Type = inframonitoringtypes.ResponseTypeGroupedList
}
missingMetrics, minFirstReportedUnixMilli, err := m.getMetricsExistenceAndEarliestTime(ctx, podsTableMetricNamesList)
minFirstReportedUnixMilli, err := m.getEarliestMetricTime(ctx, podsTableMetricNamesList)
if err != nil {
return nil, err
}
if len(missingMetrics) > 0 {
resp.RequiredMetricsCheck = inframonitoringtypes.RequiredMetricsCheck{MissingMetrics: missingMetrics}
resp.Records = []inframonitoringtypes.PodRecord{}
resp.Total = 0
return resp, nil
}
if req.End < int64(minFirstReportedUnixMilli) {
resp.EndTimeBeforeRetention = true
resp.Records = []inframonitoringtypes.PodRecord{}
resp.Total = 0
return resp, nil
}
resp.RequiredMetricsCheck = inframonitoringtypes.RequiredMetricsCheck{MissingMetrics: []string{}}
metadataMap, err := m.getPodsTableMetadata(ctx, req)
if err != nil {
@@ -276,23 +261,16 @@ func (m *module) ListNodes(ctx context.Context, orgID valuer.UUID, req *inframon
resp.Type = inframonitoringtypes.ResponseTypeGroupedList
}
missingMetrics, minFirstReportedUnixMilli, err := m.getMetricsExistenceAndEarliestTime(ctx, nodesTableMetricNamesList)
minFirstReportedUnixMilli, err := m.getEarliestMetricTime(ctx, nodesTableMetricNamesList)
if err != nil {
return nil, err
}
if len(missingMetrics) > 0 {
resp.RequiredMetricsCheck = inframonitoringtypes.RequiredMetricsCheck{MissingMetrics: missingMetrics}
resp.Records = []inframonitoringtypes.NodeRecord{}
resp.Total = 0
return resp, nil
}
if req.End < int64(minFirstReportedUnixMilli) {
resp.EndTimeBeforeRetention = true
resp.Records = []inframonitoringtypes.NodeRecord{}
resp.Total = 0
return resp, nil
}
resp.RequiredMetricsCheck = inframonitoringtypes.RequiredMetricsCheck{MissingMetrics: []string{}}
metadataMap, err := m.getNodesTableMetadata(ctx, req)
if err != nil {
@@ -366,23 +344,16 @@ func (m *module) ListNamespaces(ctx context.Context, orgID valuer.UUID, req *inf
resp.Type = inframonitoringtypes.ResponseTypeGroupedList
}
missingMetrics, minFirstReportedUnixMilli, err := m.getMetricsExistenceAndEarliestTime(ctx, namespacesTableMetricNamesList)
minFirstReportedUnixMilli, err := m.getEarliestMetricTime(ctx, namespacesTableMetricNamesList)
if err != nil {
return nil, err
}
if len(missingMetrics) > 0 {
resp.RequiredMetricsCheck = inframonitoringtypes.RequiredMetricsCheck{MissingMetrics: missingMetrics}
resp.Records = []inframonitoringtypes.NamespaceRecord{}
resp.Total = 0
return resp, nil
}
if req.End < int64(minFirstReportedUnixMilli) {
resp.EndTimeBeforeRetention = true
resp.Records = []inframonitoringtypes.NamespaceRecord{}
resp.Total = 0
return resp, nil
}
resp.RequiredMetricsCheck = inframonitoringtypes.RequiredMetricsCheck{MissingMetrics: []string{}}
metadataMap, err := m.getNamespacesTableMetadata(ctx, req)
if err != nil {
@@ -450,23 +421,16 @@ func (m *module) ListClusters(ctx context.Context, orgID valuer.UUID, req *infra
resp.Type = inframonitoringtypes.ResponseTypeGroupedList
}
missingMetrics, minFirstReportedUnixMilli, err := m.getMetricsExistenceAndEarliestTime(ctx, clustersTableMetricNamesList)
minFirstReportedUnixMilli, err := m.getEarliestMetricTime(ctx, clustersTableMetricNamesList)
if err != nil {
return nil, err
}
if len(missingMetrics) > 0 {
resp.RequiredMetricsCheck = inframonitoringtypes.RequiredMetricsCheck{MissingMetrics: missingMetrics}
resp.Records = []inframonitoringtypes.ClusterRecord{}
resp.Total = 0
return resp, nil
}
if req.End < int64(minFirstReportedUnixMilli) {
resp.EndTimeBeforeRetention = true
resp.Records = []inframonitoringtypes.ClusterRecord{}
resp.Total = 0
return resp, nil
}
resp.RequiredMetricsCheck = inframonitoringtypes.RequiredMetricsCheck{MissingMetrics: []string{}}
metadataMap, err := m.getClustersTableMetadata(ctx, req)
if err != nil {
@@ -547,23 +511,16 @@ func (m *module) ListVolumes(ctx context.Context, orgID valuer.UUID, req *infram
}
req.Filter.Expression = mergeFilterExpressions(volumesBaseFilterExpr, req.Filter.Expression)
missingMetrics, minFirstReportedUnixMilli, err := m.getMetricsExistenceAndEarliestTime(ctx, volumesTableMetricNamesList)
minFirstReportedUnixMilli, err := m.getEarliestMetricTime(ctx, volumesTableMetricNamesList)
if err != nil {
return nil, err
}
if len(missingMetrics) > 0 {
resp.RequiredMetricsCheck = inframonitoringtypes.RequiredMetricsCheck{MissingMetrics: missingMetrics}
resp.Records = []inframonitoringtypes.VolumeRecord{}
resp.Total = 0
return resp, nil
}
if req.End < int64(minFirstReportedUnixMilli) {
resp.EndTimeBeforeRetention = true
resp.Records = []inframonitoringtypes.VolumeRecord{}
resp.Total = 0
return resp, nil
}
resp.RequiredMetricsCheck = inframonitoringtypes.RequiredMetricsCheck{MissingMetrics: []string{}}
metadataMap, err := m.getVolumesTableMetadata(ctx, req)
if err != nil {
@@ -632,23 +589,16 @@ func (m *module) ListDeployments(ctx context.Context, orgID valuer.UUID, req *in
}
req.Filter.Expression = mergeFilterExpressions(deploymentsBaseFilterExpr, req.Filter.Expression)
missingMetrics, minFirstReportedUnixMilli, err := m.getMetricsExistenceAndEarliestTime(ctx, deploymentsTableMetricNamesList)
minFirstReportedUnixMilli, err := m.getEarliestMetricTime(ctx, deploymentsTableMetricNamesList)
if err != nil {
return nil, err
}
if len(missingMetrics) > 0 {
resp.RequiredMetricsCheck = inframonitoringtypes.RequiredMetricsCheck{MissingMetrics: missingMetrics}
resp.Records = []inframonitoringtypes.DeploymentRecord{}
resp.Total = 0
return resp, nil
}
if req.End < int64(minFirstReportedUnixMilli) {
resp.EndTimeBeforeRetention = true
resp.Records = []inframonitoringtypes.DeploymentRecord{}
resp.Total = 0
return resp, nil
}
resp.RequiredMetricsCheck = inframonitoringtypes.RequiredMetricsCheck{MissingMetrics: []string{}}
metadataMap, err := m.getDeploymentsTableMetadata(ctx, req)
if err != nil {
@@ -722,23 +672,16 @@ func (m *module) ListStatefulSets(ctx context.Context, orgID valuer.UUID, req *i
}
req.Filter.Expression = mergeFilterExpressions(statefulSetsBaseFilterExpr, req.Filter.Expression)
missingMetrics, minFirstReportedUnixMilli, err := m.getMetricsExistenceAndEarliestTime(ctx, statefulSetsTableMetricNamesList)
minFirstReportedUnixMilli, err := m.getEarliestMetricTime(ctx, statefulSetsTableMetricNamesList)
if err != nil {
return nil, err
}
if len(missingMetrics) > 0 {
resp.RequiredMetricsCheck = inframonitoringtypes.RequiredMetricsCheck{MissingMetrics: missingMetrics}
resp.Records = []inframonitoringtypes.StatefulSetRecord{}
resp.Total = 0
return resp, nil
}
if req.End < int64(minFirstReportedUnixMilli) {
resp.EndTimeBeforeRetention = true
resp.Records = []inframonitoringtypes.StatefulSetRecord{}
resp.Total = 0
return resp, nil
}
resp.RequiredMetricsCheck = inframonitoringtypes.RequiredMetricsCheck{MissingMetrics: []string{}}
metadataMap, err := m.getStatefulSetsTableMetadata(ctx, req)
if err != nil {
@@ -814,23 +757,16 @@ func (m *module) ListJobs(ctx context.Context, orgID valuer.UUID, req *inframoni
}
req.Filter.Expression = mergeFilterExpressions(jobsBaseFilterExpr, req.Filter.Expression)
missingMetrics, minFirstReportedUnixMilli, err := m.getMetricsExistenceAndEarliestTime(ctx, jobsTableMetricNamesList)
minFirstReportedUnixMilli, err := m.getEarliestMetricTime(ctx, jobsTableMetricNamesList)
if err != nil {
return nil, err
}
if len(missingMetrics) > 0 {
resp.RequiredMetricsCheck = inframonitoringtypes.RequiredMetricsCheck{MissingMetrics: missingMetrics}
resp.Records = []inframonitoringtypes.JobRecord{}
resp.Total = 0
return resp, nil
}
if req.End < int64(minFirstReportedUnixMilli) {
resp.EndTimeBeforeRetention = true
resp.Records = []inframonitoringtypes.JobRecord{}
resp.Total = 0
return resp, nil
}
resp.RequiredMetricsCheck = inframonitoringtypes.RequiredMetricsCheck{MissingMetrics: []string{}}
metadataMap, err := m.getJobsTableMetadata(ctx, req)
if err != nil {
@@ -906,23 +842,16 @@ func (m *module) ListDaemonSets(ctx context.Context, orgID valuer.UUID, req *inf
}
req.Filter.Expression = mergeFilterExpressions(daemonSetsBaseFilterExpr, req.Filter.Expression)
missingMetrics, minFirstReportedUnixMilli, err := m.getMetricsExistenceAndEarliestTime(ctx, daemonSetsTableMetricNamesList)
minFirstReportedUnixMilli, err := m.getEarliestMetricTime(ctx, daemonSetsTableMetricNamesList)
if err != nil {
return nil, err
}
if len(missingMetrics) > 0 {
resp.RequiredMetricsCheck = inframonitoringtypes.RequiredMetricsCheck{MissingMetrics: missingMetrics}
resp.Records = []inframonitoringtypes.DaemonSetRecord{}
resp.Total = 0
return resp, nil
}
if req.End < int64(minFirstReportedUnixMilli) {
resp.EndTimeBeforeRetention = true
resp.Records = []inframonitoringtypes.DaemonSetRecord{}
resp.Total = 0
return resp, nil
}
resp.RequiredMetricsCheck = inframonitoringtypes.RequiredMetricsCheck{MissingMetrics: []string{}}
metadataMap, err := m.getDaemonSetsTableMetadata(ctx, req)
if err != nil {

View File

@@ -119,11 +119,7 @@ func (q *querier) QueryRange(ctx context.Context, orgID valuer.UUID, req *qbtype
queries := make(map[string]qbtypes.Query)
steps := make(map[string]qbtypes.Step)
// Resolve metric metadata once per request: patches each metric-aggregation
// query's spec in place, returns the queries whose every aggregation was
// missing (used for preseeded empty results), and any dormant-metric
// warning string. NotFound errors for never-seen metrics are propagated.
missingMetricQueries, dormantMetricsWarningMsg, err := q.resolveMetricMetadata(ctx, req.CompositeQuery.Queries, req.Start, req.End)
missingMetricQueries, metricWarnings, err := q.resolveMetricMetadata(ctx, req.CompositeQuery.Queries, req.Start, req.End)
if err != nil {
return nil, err
}
@@ -240,13 +236,15 @@ func (q *querier) QueryRange(ctx context.Context, orgID valuer.UUID, req *qbtype
}
}
}
if dormantMetricsWarningMsg != "" {
if len(metricWarnings) > 0 {
if qbResp.Warning == nil {
qbResp.Warning = &qbtypes.QueryWarnData{}
}
qbResp.Warning.Warnings = append(qbResp.Warning.Warnings, qbtypes.QueryWarnDataAdditional{
Message: dormantMetricsWarningMsg,
})
for _, w := range metricWarnings {
qbResp.Warning.Warnings = append(qbResp.Warning.Warnings, qbtypes.QueryWarnDataAdditional{
Message: w,
})
}
}
}
return qbResp, qbErr
@@ -302,12 +300,11 @@ func (q *querier) populateQBEvent(event *qbtypes.QBEvent, queries []qbtypes.Quer
// - missingMetricQueries: names of queries whose every aggregation was
// missing. Used downstream to preseed empty result placeholders so the
// response still has an entry per requested query name.
// - dormantWarning: a human-readable warning describing metrics that exist in
// the store but produced no data within the query window. Empty when no
// such metrics are present.
// - err: NotFound when one or more referenced metrics have never been seen,
// or Internal when a metadata fetch fails.
func (q *querier) resolveMetricMetadata(ctx context.Context, queries []qbtypes.QueryEnvelope, start, end uint64) (missingMetricQueries []string, dormantWarning string, err error) {
// - metricWarnings: human-readable warnings for metrics that could not be
// resolved: never-seen metrics and dormant metrics (seen but no data in
// the query window).
// - err: Internal when a metadata fetch fails.
func (q *querier) resolveMetricMetadata(ctx context.Context, queries []qbtypes.QueryEnvelope, start, end uint64) (missingMetricQueries []string, metricWarnings []string, err error) {
metricNames := make([]string, 0)
for idx := range queries {
if queries[idx].Type != qbtypes.QueryTypeBuilder {
@@ -325,13 +322,13 @@ func (q *querier) resolveMetricMetadata(ctx context.Context, queries []qbtypes.Q
}
if len(metricNames) == 0 {
return nil, "", nil
return nil, nil, nil
}
metricTemporality, metricTypes, err := q.metadataStore.FetchTemporalityAndTypeMulti(ctx, start, end, metricNames...)
if err != nil {
q.logger.WarnContext(ctx, "failed to fetch metric temporality", errors.Attr(err), slog.Any("metrics", metricNames))
return nil, "", errors.NewInternalf(errors.CodeInternal, "failed to fetch metrics temporality")
return nil, nil, errors.NewInternalf(errors.CodeInternal, "failed to fetch metrics temporality")
}
q.logger.DebugContext(ctx, "fetched metric temporalities and types", slog.Any("metric_temporality", metricTemporality), slog.Any("metric_types", metricTypes))
@@ -363,7 +360,7 @@ func (q *querier) resolveMetricMetadata(ctx context.Context, queries []qbtypes.Q
}
// Type is resolved now; validate aggregation compatibility against it.
if err := spec.Aggregations[i].ValidateForType(); err != nil {
return nil, "", err
return nil, nil, err
}
presentAggregations = append(presentAggregations, spec.Aggregations[i])
}
@@ -376,7 +373,7 @@ func (q *querier) resolveMetricMetadata(ctx context.Context, queries []qbtypes.Q
}
if len(missingMetrics) == 0 {
return missingMetricQueries, "", nil
return missingMetricQueries, nil, nil
}
isInternalMetric := func(n string) bool { return strings.HasPrefix(n, "signoz.") || strings.HasPrefix(n, "signoz_") }
@@ -387,29 +384,33 @@ func (q *querier) resolveMetricMetadata(ctx context.Context, queries []qbtypes.Q
}
}
if len(externalMissingMetrics) == 0 {
// this means all missing metrics are internal, and since internal metrics
// aren't user-controlled, skip errors/warnings for them since users can't act on them
return missingMetricQueries, "", nil
return missingMetricQueries, nil, nil
}
// Classify each missing metric: never-seen → NotFound error; seen-but-no-
// data-in-window dormant warning.
// Classify each missing metric: never-seen -> warning with empty result;
// seen-but-no-data-in-window -> dormant warning.
lastSeenInfo, _ := q.metadataStore.FetchLastSeenInfoMulti(ctx, externalMissingMetrics...)
nonExistentMetrics := []string{}
var nonExistentMetrics []string
var dormantMetrics []string
for _, name := range externalMissingMetrics {
if ts, ok := lastSeenInfo[name]; ok && ts > 0 {
dormantMetrics = append(dormantMetrics, name)
continue
}
nonExistentMetrics = append(nonExistentMetrics, name)
}
var warnings []string
// Never-seen metrics: the query already gets a preseeded empty result
// via the aggregation-dropping path above; we just attach a warning.
if len(nonExistentMetrics) == 1 {
return nil, "", errors.NewNotFoundf(errors.CodeNotFound, "could not find the metric %s", nonExistentMetrics[0])
}
if len(nonExistentMetrics) > 1 {
return nil, "", errors.NewNotFoundf(errors.CodeNotFound, "the following metrics were not found: %s", strings.Join(nonExistentMetrics, ", "))
warnings = append(warnings, fmt.Sprintf("metric %s has never been received. Check the metric name and instrumentation", nonExistentMetrics[0]))
} else if len(nonExistentMetrics) > 1 {
warnings = append(warnings, fmt.Sprintf("the following metrics have never been received. Check the metric names and instrumentation: %s", strings.Join(nonExistentMetrics, ", ")))
}
// All missing metrics are dormant — assemble the warning string.
// Dormant metrics: seen before but no data in the query window.
lastSeenStr := func(name string) string {
if ts, ok := lastSeenInfo[name]; ok && ts > 0 {
ago := humanize.RelTime(time.UnixMilli(ts), time.Now(), "ago", "from now")
@@ -417,16 +418,16 @@ func (q *querier) resolveMetricMetadata(ctx context.Context, queries []qbtypes.Q
}
return name
}
if len(externalMissingMetrics) == 1 {
dormantWarning = fmt.Sprintf("no data found for the metric %s in the query time range", lastSeenStr(missingMetrics[0]))
} else {
parts := make([]string, len(externalMissingMetrics))
for i, m := range externalMissingMetrics {
if len(dormantMetrics) == 1 {
warnings = append(warnings, fmt.Sprintf("no data found for the metric %s in the query time range", lastSeenStr(dormantMetrics[0])))
} else if len(dormantMetrics) > 1 {
parts := make([]string, len(dormantMetrics))
for i, m := range dormantMetrics {
parts[i] = lastSeenStr(m)
}
dormantWarning = fmt.Sprintf("no data found for the following metrics in the query time range: %s", strings.Join(parts, ", "))
warnings = append(warnings, fmt.Sprintf("no data found for the following metrics in the query time range: %s", strings.Join(parts, ", ")))
}
return missingMetricQueries, dormantWarning, nil
return missingMetricQueries, warnings, nil
}
func (q *querier) QueryRawStream(ctx context.Context, orgID valuer.UUID, req *qbtypes.QueryRangeRequest, client *qbtypes.RawStream) {

View File

@@ -37,7 +37,7 @@ func (m *mockMetricStmtBuilder) Build(_ context.Context, _, _ uint64, _ qbtypes.
func TestQueryRange_MetricTypeMissing(t *testing.T) {
// When a metric has UnspecifiedType and is not found in the metadata store,
// the querier should return a not-found error, even if the request provides a temporality
// the querier should return an empty result with a warning instead of an error.
providerSettings := instrumentationtest.New().ToProviderSettings()
metadataStore := telemetrytypestest.NewMockMetadataStore()
@@ -80,9 +80,14 @@ func TestQueryRange_MetricTypeMissing(t *testing.T) {
},
}
_, err := q.QueryRange(context.Background(), valuer.GenerateUUID(), req)
require.Error(t, err)
assert.Contains(t, err.Error(), "could not find the metric unknown_metric")
resp, err := q.QueryRange(context.Background(), valuer.GenerateUUID(), req)
require.NoError(t, err)
require.NotNil(t, resp)
require.NotNil(t, resp.Warning)
require.Len(t, resp.Warning.Warnings, 1)
assert.Contains(t, resp.Warning.Warnings[0].Message, "unknown_metric")
assert.Contains(t, resp.Warning.Warnings[0].Message, "has never been received")
}
func TestQueryRange_MetricTypeFromStore(t *testing.T) {

View File

@@ -101,9 +101,29 @@ func (b *MetricQueryStatementBuilder) Build(
return nil, err
}
var pairFallbackWarnings []string
for _, sel := range keySelectors {
if _, ok := keys[sel.Name]; !ok {
keys[sel.Name] = []*telemetrytypes.TelemetryFieldKey{{
Name: sel.Name,
FieldContext: telemetrytypes.FieldContextAttribute,
FieldDataType: telemetrytypes.FieldDataTypeString,
Signal: telemetrytypes.SignalMetrics,
}}
pairFallbackWarnings = append(pairFallbackWarnings,
fmt.Sprintf("key `%s` not found on metric %s", sel.Name, query.Aggregations[0].MetricName),
)
}
}
start, end = querybuilder.AdjustedMetricTimeRange(start, end, uint64(query.StepInterval.Seconds()), query)
return b.buildPipelineStatement(ctx, start, end, query, keys, variables)
stmt, err := b.buildPipelineStatement(ctx, start, end, query, keys, variables)
if err != nil {
return nil, err
}
stmt.Warnings = append(stmt.Warnings, pairFallbackWarnings...)
return stmt, nil
}
func (b *MetricQueryStatementBuilder) buildPipelineStatement(

View File

@@ -217,6 +217,39 @@ func TestStatementBuilder(t *testing.T) {
},
expectedErr: nil,
},
{
name: "test_missing_key_falls_back_to_labels",
requestType: qbtypes.RequestTypeTimeSeries,
query: qbtypes.QueryBuilderQuery[qbtypes.MetricAggregation]{
Signal: telemetrytypes.SignalMetrics,
StepInterval: qbtypes.Step{Duration: 30 * time.Second},
Aggregations: []qbtypes.MetricAggregation{
{
MetricName: "signoz_calls_total",
Type: metrictypes.SumType,
Temporality: metrictypes.Cumulative,
TimeAggregation: metrictypes.TimeAggregationRate,
SpaceAggregation: metrictypes.SpaceAggregationSum,
},
},
Filter: &qbtypes.Filter{
Expression: "k8s.statefulset.name = 'my-statefulset'",
},
GroupBy: []qbtypes.GroupByKey{
{
TelemetryFieldKey: telemetrytypes.TelemetryFieldKey{
Name: "k8s.statefulset.name",
},
},
},
},
expected: qbtypes.Statement{
Query: "WITH __temporal_aggregation_cte AS (SELECT ts, `k8s.statefulset.name`, multiIf(row_number() OVER rate_window = 1, nan, (per_series_value - lagInFrame(per_series_value, 1) OVER rate_window) < 0, per_series_value / (ts - lagInFrame(ts, 1) OVER rate_window), (per_series_value - lagInFrame(per_series_value, 1) OVER rate_window) / (ts - lagInFrame(ts, 1) OVER rate_window)) AS per_series_value FROM (SELECT fingerprint, toStartOfInterval(toDateTime(intDiv(unix_milli, 1000)), toIntervalSecond(30)) AS ts, `k8s.statefulset.name`, max(value) AS per_series_value FROM signoz_metrics.distributed_samples_v4 AS points INNER JOIN (SELECT fingerprint, JSONExtractString(labels, 'k8s.statefulset.name') AS `k8s.statefulset.name` FROM signoz_metrics.time_series_v4_6hrs WHERE metric_name IN (?) AND unix_milli >= ? AND unix_milli <= ? AND LOWER(temporality) LIKE LOWER(?) AND __normalized = ? AND JSONExtractString(labels, 'k8s.statefulset.name') = ? GROUP BY fingerprint, `k8s.statefulset.name`) AS filtered_time_series ON points.fingerprint = filtered_time_series.fingerprint WHERE metric_name IN (?) AND unix_milli >= ? AND unix_milli < ? GROUP BY fingerprint, ts, `k8s.statefulset.name` ORDER BY fingerprint, ts) WINDOW rate_window AS (PARTITION BY fingerprint ORDER BY fingerprint, ts)), __spatial_aggregation_cte AS (SELECT ts, `k8s.statefulset.name`, sum(per_series_value) AS value FROM __temporal_aggregation_cte WHERE isNaN(per_series_value) = ? GROUP BY ts, `k8s.statefulset.name`) SELECT * FROM __spatial_aggregation_cte ORDER BY `k8s.statefulset.name`, ts",
Args: []any{"signoz_calls_total", uint64(1747936800000), uint64(1747983420000), "cumulative", false, "my-statefulset", "signoz_calls_total", uint64(1747947360000), uint64(1747983420000), 0},
Warnings: []string{"key `k8s.statefulset.name` not found on metric signoz_calls_total"},
},
expectedErr: nil,
},
}
fm := NewFieldMapper()

View File

@@ -12,7 +12,6 @@ type Clusters struct {
Type ResponseType `json:"type" required:"true"`
Records []ClusterRecord `json:"records" required:"true" nullable:"false"`
Total int `json:"total" required:"true"`
RequiredMetricsCheck RequiredMetricsCheck `json:"requiredMetricsCheck" required:"true"`
EndTimeBeforeRetention bool `json:"endTimeBeforeRetention" required:"true"`
Warning *qbtypes.QueryWarnData `json:"warning,omitempty"`
}

View File

@@ -12,7 +12,6 @@ type DaemonSets struct {
Type ResponseType `json:"type" required:"true"`
Records []DaemonSetRecord `json:"records" required:"true" nullable:"false"`
Total int `json:"total" required:"true"`
RequiredMetricsCheck RequiredMetricsCheck `json:"requiredMetricsCheck" required:"true"`
EndTimeBeforeRetention bool `json:"endTimeBeforeRetention" required:"true"`
Warning *qbtypes.QueryWarnData `json:"warning,omitempty"`
}

View File

@@ -12,7 +12,6 @@ type Deployments struct {
Type ResponseType `json:"type" required:"true"`
Records []DeploymentRecord `json:"records" required:"true" nullable:"false"`
Total int `json:"total" required:"true"`
RequiredMetricsCheck RequiredMetricsCheck `json:"requiredMetricsCheck" required:"true"`
EndTimeBeforeRetention bool `json:"endTimeBeforeRetention" required:"true"`
Warning *qbtypes.QueryWarnData `json:"warning,omitempty"`
}

View File

@@ -12,7 +12,6 @@ type Hosts struct {
Type ResponseType `json:"type" required:"true"`
Records []HostRecord `json:"records" required:"true" nullable:"false"`
Total int `json:"total" required:"true"`
RequiredMetricsCheck RequiredMetricsCheck `json:"requiredMetricsCheck" required:"true"`
EndTimeBeforeRetention bool `json:"endTimeBeforeRetention" required:"true"`
Warning *qbtypes.QueryWarnData `json:"warning,omitempty"`
}
@@ -30,10 +29,6 @@ type HostRecord struct {
Meta map[string]string `json:"meta" required:"true"`
}
type RequiredMetricsCheck struct {
MissingMetrics []string `json:"missingMetrics" required:"true"`
}
type PostableHosts struct {
Start int64 `json:"start" required:"true"`
End int64 `json:"end" required:"true"`

View File

@@ -12,7 +12,6 @@ type Jobs struct {
Type ResponseType `json:"type" required:"true"`
Records []JobRecord `json:"records" required:"true" nullable:"false"`
Total int `json:"total" required:"true"`
RequiredMetricsCheck RequiredMetricsCheck `json:"requiredMetricsCheck" required:"true"`
EndTimeBeforeRetention bool `json:"endTimeBeforeRetention" required:"true"`
Warning *qbtypes.QueryWarnData `json:"warning,omitempty"`
}

View File

@@ -12,7 +12,6 @@ type Namespaces struct {
Type ResponseType `json:"type" required:"true"`
Records []NamespaceRecord `json:"records" required:"true" nullable:"false"`
Total int `json:"total" required:"true"`
RequiredMetricsCheck RequiredMetricsCheck `json:"requiredMetricsCheck" required:"true"`
EndTimeBeforeRetention bool `json:"endTimeBeforeRetention" required:"true"`
Warning *qbtypes.QueryWarnData `json:"warning,omitempty"`
}

View File

@@ -12,7 +12,6 @@ type Nodes struct {
Type ResponseType `json:"type" required:"true"`
Records []NodeRecord `json:"records" required:"true" nullable:"false"`
Total int `json:"total" required:"true"`
RequiredMetricsCheck RequiredMetricsCheck `json:"requiredMetricsCheck" required:"true"`
EndTimeBeforeRetention bool `json:"endTimeBeforeRetention" required:"true"`
Warning *qbtypes.QueryWarnData `json:"warning,omitempty"`
}

View File

@@ -12,7 +12,6 @@ type Pods struct {
Type ResponseType `json:"type" required:"true"`
Records []PodRecord `json:"records" required:"true" nullable:"false"`
Total int `json:"total" required:"true"`
RequiredMetricsCheck RequiredMetricsCheck `json:"requiredMetricsCheck" required:"true"`
EndTimeBeforeRetention bool `json:"endTimeBeforeRetention" required:"true"`
Warning *qbtypes.QueryWarnData `json:"warning,omitempty"`
}

View File

@@ -12,7 +12,6 @@ type StatefulSets struct {
Type ResponseType `json:"type" required:"true"`
Records []StatefulSetRecord `json:"records" required:"true" nullable:"false"`
Total int `json:"total" required:"true"`
RequiredMetricsCheck RequiredMetricsCheck `json:"requiredMetricsCheck" required:"true"`
EndTimeBeforeRetention bool `json:"endTimeBeforeRetention" required:"true"`
Warning *qbtypes.QueryWarnData `json:"warning,omitempty"`
}

View File

@@ -12,7 +12,6 @@ type Volumes struct {
Type ResponseType `json:"type" required:"true"`
Records []VolumeRecord `json:"records" required:"true" nullable:"false"`
Total int `json:"total" required:"true"`
RequiredMetricsCheck RequiredMetricsCheck `json:"requiredMetricsCheck" required:"true"`
EndTimeBeforeRetention bool `json:"endTimeBeforeRetention" required:"true"`
Warning *qbtypes.QueryWarnData `json:"warning,omitempty"`
}

View File

@@ -0,0 +1,36 @@
{"metric_name": "system.cpu.time", "labels": {"host.name": "kp-h1", "os.type": "linux", "state": "user"}, "timestamp": "2025-01-10T10:00:00+00:00", "value": 100, "temporality": "Cumulative", "type_": "Sum", "is_monotonic": true}
{"metric_name": "system.cpu.time", "labels": {"host.name": "kp-h1", "os.type": "linux", "state": "user"}, "timestamp": "2025-01-10T10:02:00+00:00", "value": 200, "temporality": "Cumulative", "type_": "Sum", "is_monotonic": true}
{"metric_name": "system.cpu.time", "labels": {"host.name": "kp-h1", "os.type": "linux", "state": "user"}, "timestamp": "2025-01-10T10:04:00+00:00", "value": 300, "temporality": "Cumulative", "type_": "Sum", "is_monotonic": true}
{"metric_name": "system.cpu.time", "labels": {"host.name": "kp-h1", "os.type": "linux", "state": "system"}, "timestamp": "2025-01-10T10:00:00+00:00", "value": 50, "temporality": "Cumulative", "type_": "Sum", "is_monotonic": true}
{"metric_name": "system.cpu.time", "labels": {"host.name": "kp-h1", "os.type": "linux", "state": "system"}, "timestamp": "2025-01-10T10:02:00+00:00", "value": 100, "temporality": "Cumulative", "type_": "Sum", "is_monotonic": true}
{"metric_name": "system.cpu.time", "labels": {"host.name": "kp-h1", "os.type": "linux", "state": "system"}, "timestamp": "2025-01-10T10:04:00+00:00", "value": 150, "temporality": "Cumulative", "type_": "Sum", "is_monotonic": true}
{"metric_name": "system.cpu.time", "labels": {"host.name": "kp-h1", "os.type": "linux", "state": "idle"}, "timestamp": "2025-01-10T10:00:00+00:00", "value": 400, "temporality": "Cumulative", "type_": "Sum", "is_monotonic": true}
{"metric_name": "system.cpu.time", "labels": {"host.name": "kp-h1", "os.type": "linux", "state": "idle"}, "timestamp": "2025-01-10T10:02:00+00:00", "value": 600, "temporality": "Cumulative", "type_": "Sum", "is_monotonic": true}
{"metric_name": "system.cpu.time", "labels": {"host.name": "kp-h1", "os.type": "linux", "state": "idle"}, "timestamp": "2025-01-10T10:04:00+00:00", "value": 800, "temporality": "Cumulative", "type_": "Sum", "is_monotonic": true}
{"metric_name": "system.cpu.time", "labels": {"host.name": "kp-h1", "os.type": "linux", "state": "wait"}, "timestamp": "2025-01-10T10:00:00+00:00", "value": 10, "temporality": "Cumulative", "type_": "Sum", "is_monotonic": true}
{"metric_name": "system.cpu.time", "labels": {"host.name": "kp-h1", "os.type": "linux", "state": "wait"}, "timestamp": "2025-01-10T10:02:00+00:00", "value": 20, "temporality": "Cumulative", "type_": "Sum", "is_monotonic": true}
{"metric_name": "system.cpu.time", "labels": {"host.name": "kp-h1", "os.type": "linux", "state": "wait"}, "timestamp": "2025-01-10T10:04:00+00:00", "value": 30, "temporality": "Cumulative", "type_": "Sum", "is_monotonic": true}
{"metric_name": "system.memory.usage", "labels": {"host.name": "kp-h1", "os.type": "linux", "state": "used", "deployment.environment": "prod"}, "timestamp": "2025-01-10T10:00:00+00:00", "value": 2000000000, "temporality": "Unspecified", "type_": "Sum", "is_monotonic": false}
{"metric_name": "system.memory.usage", "labels": {"host.name": "kp-h1", "os.type": "linux", "state": "used", "deployment.environment": "prod"}, "timestamp": "2025-01-10T10:02:00+00:00", "value": 2100000000, "temporality": "Unspecified", "type_": "Sum", "is_monotonic": false}
{"metric_name": "system.memory.usage", "labels": {"host.name": "kp-h1", "os.type": "linux", "state": "used", "deployment.environment": "prod"}, "timestamp": "2025-01-10T10:04:00+00:00", "value": 2200000000, "temporality": "Unspecified", "type_": "Sum", "is_monotonic": false}
{"metric_name": "system.memory.usage", "labels": {"host.name": "kp-h1", "os.type": "linux", "state": "free", "deployment.environment": "prod"}, "timestamp": "2025-01-10T10:00:00+00:00", "value": 6000000000, "temporality": "Unspecified", "type_": "Sum", "is_monotonic": false}
{"metric_name": "system.memory.usage", "labels": {"host.name": "kp-h1", "os.type": "linux", "state": "free", "deployment.environment": "prod"}, "timestamp": "2025-01-10T10:02:00+00:00", "value": 5900000000, "temporality": "Unspecified", "type_": "Sum", "is_monotonic": false}
{"metric_name": "system.memory.usage", "labels": {"host.name": "kp-h1", "os.type": "linux", "state": "free", "deployment.environment": "prod"}, "timestamp": "2025-01-10T10:04:00+00:00", "value": 5800000000, "temporality": "Unspecified", "type_": "Sum", "is_monotonic": false}
{"metric_name": "system.memory.usage", "labels": {"host.name": "kp-h1", "os.type": "linux", "state": "buffered", "deployment.environment": "prod"}, "timestamp": "2025-01-10T10:00:00+00:00", "value": 500000000, "temporality": "Unspecified", "type_": "Sum", "is_monotonic": false}
{"metric_name": "system.memory.usage", "labels": {"host.name": "kp-h1", "os.type": "linux", "state": "buffered", "deployment.environment": "prod"}, "timestamp": "2025-01-10T10:02:00+00:00", "value": 500000000, "temporality": "Unspecified", "type_": "Sum", "is_monotonic": false}
{"metric_name": "system.memory.usage", "labels": {"host.name": "kp-h1", "os.type": "linux", "state": "buffered", "deployment.environment": "prod"}, "timestamp": "2025-01-10T10:04:00+00:00", "value": 500000000, "temporality": "Unspecified", "type_": "Sum", "is_monotonic": false}
{"metric_name": "system.memory.usage", "labels": {"host.name": "kp-h1", "os.type": "linux", "state": "cached", "deployment.environment": "prod"}, "timestamp": "2025-01-10T10:00:00+00:00", "value": 1500000000, "temporality": "Unspecified", "type_": "Sum", "is_monotonic": false}
{"metric_name": "system.memory.usage", "labels": {"host.name": "kp-h1", "os.type": "linux", "state": "cached", "deployment.environment": "prod"}, "timestamp": "2025-01-10T10:02:00+00:00", "value": 1500000000, "temporality": "Unspecified", "type_": "Sum", "is_monotonic": false}
{"metric_name": "system.memory.usage", "labels": {"host.name": "kp-h1", "os.type": "linux", "state": "cached", "deployment.environment": "prod"}, "timestamp": "2025-01-10T10:04:00+00:00", "value": 1500000000, "temporality": "Unspecified", "type_": "Sum", "is_monotonic": false}
{"metric_name": "system.cpu.load_average.15m", "labels": {"host.name": "kp-h1", "os.type": "linux"}, "timestamp": "2025-01-10T10:00:00+00:00", "value": 1.5, "temporality": "Unspecified", "type_": "Gauge", "is_monotonic": false}
{"metric_name": "system.cpu.load_average.15m", "labels": {"host.name": "kp-h1", "os.type": "linux"}, "timestamp": "2025-01-10T10:02:00+00:00", "value": 1.55, "temporality": "Unspecified", "type_": "Gauge", "is_monotonic": false}
{"metric_name": "system.cpu.load_average.15m", "labels": {"host.name": "kp-h1", "os.type": "linux"}, "timestamp": "2025-01-10T10:04:00+00:00", "value": 1.6, "temporality": "Unspecified", "type_": "Gauge", "is_monotonic": false}
{"metric_name": "system.filesystem.usage", "labels": {"host.name": "kp-h1", "os.type": "linux", "state": "used", "deployment.environment": "prod"}, "timestamp": "2025-01-10T10:00:00+00:00", "value": 50000000000, "temporality": "Unspecified", "type_": "Sum", "is_monotonic": false}
{"metric_name": "system.filesystem.usage", "labels": {"host.name": "kp-h1", "os.type": "linux", "state": "used", "deployment.environment": "prod"}, "timestamp": "2025-01-10T10:02:00+00:00", "value": 51000000000, "temporality": "Unspecified", "type_": "Sum", "is_monotonic": false}
{"metric_name": "system.filesystem.usage", "labels": {"host.name": "kp-h1", "os.type": "linux", "state": "used", "deployment.environment": "prod"}, "timestamp": "2025-01-10T10:04:00+00:00", "value": 52000000000, "temporality": "Unspecified", "type_": "Sum", "is_monotonic": false}
{"metric_name": "system.filesystem.usage", "labels": {"host.name": "kp-h1", "os.type": "linux", "state": "free", "deployment.environment": "prod"}, "timestamp": "2025-01-10T10:00:00+00:00", "value": 50000000000, "temporality": "Unspecified", "type_": "Sum", "is_monotonic": false}
{"metric_name": "system.filesystem.usage", "labels": {"host.name": "kp-h1", "os.type": "linux", "state": "free", "deployment.environment": "prod"}, "timestamp": "2025-01-10T10:02:00+00:00", "value": 49000000000, "temporality": "Unspecified", "type_": "Sum", "is_monotonic": false}
{"metric_name": "system.filesystem.usage", "labels": {"host.name": "kp-h1", "os.type": "linux", "state": "free", "deployment.environment": "prod"}, "timestamp": "2025-01-10T10:04:00+00:00", "value": 48000000000, "temporality": "Unspecified", "type_": "Sum", "is_monotonic": false}
{"metric_name": "system.filesystem.usage", "labels": {"host.name": "kp-h1", "os.type": "linux", "state": "reserved", "deployment.environment": "prod"}, "timestamp": "2025-01-10T10:00:00+00:00", "value": 5000000000, "temporality": "Unspecified", "type_": "Sum", "is_monotonic": false}
{"metric_name": "system.filesystem.usage", "labels": {"host.name": "kp-h1", "os.type": "linux", "state": "reserved", "deployment.environment": "prod"}, "timestamp": "2025-01-10T10:02:00+00:00", "value": 5000000000, "temporality": "Unspecified", "type_": "Sum", "is_monotonic": false}
{"metric_name": "system.filesystem.usage", "labels": {"host.name": "kp-h1", "os.type": "linux", "state": "reserved", "deployment.environment": "prod"}, "timestamp": "2025-01-10T10:04:00+00:00", "value": 5000000000, "temporality": "Unspecified", "type_": "Sum", "is_monotonic": false}

View File

@@ -11,7 +11,7 @@ from fixtures import types
from fixtures.auth import USER_ADMIN_EMAIL, USER_ADMIN_PASSWORD
from fixtures.fs import get_testdata_file_path
from fixtures.metrics import Metrics
from fixtures.querier import compare_values
from fixtures.querier import compare_values, get_all_warnings
ENDPOINT = "/api/v2/infra_monitoring/hosts"
@@ -56,7 +56,8 @@ def test_hosts_accuracy(
# Shape/contract.
assert data["total"] == len(expected["records"])
assert len(data["records"]) == len(expected["records"])
assert data["requiredMetricsCheck"]["missingMetrics"] == []
# Full data present -> no warnings surfaced.
assert get_all_warnings(response.json()) == []
assert data["endTimeBeforeRetention"] is False
assert {r["hostName"] for r in data["records"]} == set(exp_by_host.keys())
@@ -79,45 +80,109 @@ def test_hosts_accuracy(
assert compare_values(record[field], exp[field], 1e-9), f"{record['hostName']}.{field}: got {record[field]}, expected {exp[field]}"
def test_hosts_missing_metrics(
@pytest.mark.parametrize(
"case",
[
# Scenario 1: a required host metric was never ingested. Post-#11754 the
# querier drops it instead of hard-erroring, so the endpoint returns 200
# with the hosts that DO have data; the never-seen metric's column is the
# -1 sentinel and a "has never been received" warning is surfaced.
pytest.param(
{
"dataset": "hosts_missing_metrics.jsonl", # seeds only system.cpu.time
"body": {"filter": {"expression": "host.name = 'miss-h1'"}},
# singular form is "has", plural (>1 missing) is "have" -> match the common stem
"warn_substrings": ["never been received"],
"warn_names": [
"system.memory.usage",
"system.cpu.load_average.15m",
"system.filesystem.usage",
],
# cpu/wait derive from the present system.cpu.time; the rest come
# from never-seen metrics -> -1 sentinel.
"data_fields": ["cpu", "wait"],
"no_data_fields": ["memory", "load15", "diskUsage"],
},
id="metric_never_seen",
),
# Scenario 2: groupBy a key that IS in metadata (seen on some metrics) but
# was never seen together with the host metrics. deployment.environment is
# seeded on system.memory.usage + system.filesystem.usage only, so the key
# resolves (the page-groups IN-filter parses -> no 400) but the cpu.time /
# load_average metrics miss the (metric, key) pair and the statement builder
# falls back to raw labels, surfacing "key `...` not found on metric ...".
# Still 200, no hard error.
pytest.param(
{
"dataset": "hosts_metric_key_pair.jsonl",
"body": {
"filter": {"expression": "host.name = 'kp-h1'"},
"groupBy": [
{
"name": "deployment.environment",
"fieldDataType": "string",
"fieldContext": "resource",
}
],
},
"warn_substrings": ["key `deployment.environment` not found on metric"],
"warn_names": [],
"data_fields": [],
"no_data_fields": [],
},
id="metric_key_pair_not_seen",
),
],
)
def test_hosts_warnings(
signoz: types.SigNoz,
create_user_admin: None, # pylint: disable=unused-argument
get_token,
insert_metrics,
case: dict,
) -> None:
"""Seed only system.cpu.time; assert other 3 required metrics flagged missing."""
"""Data-availability gaps surface as non-blocking warnings (200 + data),
not hard errors. Covers never-seen metrics (scenario 1) and never-seen
(metric, key) pairs via groupBy (scenario 2). Field-level expectations are
provisional pending a real run."""
now = datetime.now(tz=UTC).replace(microsecond=0)
insert_metrics(
Metrics.load_from_file(
get_testdata_file_path("inframonitoring/hosts_missing_metrics.jsonl"),
get_testdata_file_path(f"inframonitoring/{case['dataset']}"),
base_time=now - timedelta(minutes=4),
)
)
token = get_token(USER_ADMIN_EMAIL, USER_ADMIN_PASSWORD)
body: dict = {
"start": int((now - timedelta(minutes=5)).timestamp() * 1000),
"end": int(now.timestamp() * 1000),
"limit": 50,
}
body.update(case["body"])
response = requests.post(
signoz.self.host_configs["8080"].get(ENDPOINT),
headers={"authorization": f"Bearer {token}"},
json={
"start": int((now - timedelta(minutes=5)).timestamp() * 1000),
"end": int(now.timestamp() * 1000),
"limit": 50,
},
json=body,
timeout=5,
)
assert response.status_code == HTTPStatus.OK
assert response.status_code == HTTPStatus.OK, response.text
data = response.json()["data"]
warnings = get_all_warnings(response.json())
assert set(data["requiredMetricsCheck"]["missingMetrics"]) == {
"system.memory.usage",
"system.cpu.load_average.15m",
"system.filesystem.usage",
}
# Endpoint short-circuits when any required metric is missing:
# records is empty and total=0 regardless of which hosts have partial data.
# See pkg/modules/inframonitoring/implinframonitoring/module.go:84-89.
assert data["records"] == []
assert data["total"] == 0
for substr in case["warn_substrings"]:
assert any(substr in w["message"] for w in warnings), f"{substr!r} not surfaced: {warnings!r}"
for name in case["warn_names"]:
assert any(name in w["message"] for w in warnings), f"{name!r} not surfaced: {warnings!r}"
assert len(data["records"]) >= 1, f"expected at least one record: {data!r}"
if case["data_fields"] or case["no_data_fields"]:
record = data["records"][0]
for field in case["data_fields"]:
assert record[field] != -1, f"expected {field} populated, got {record[field]}"
for field in case["no_data_fields"]:
assert record[field] == -1, f"expected {field} == -1 sentinel, got {record[field]}"
@pytest.mark.parametrize(

View File

@@ -614,7 +614,7 @@ def test_histogram_p90_returns_warning_outside_data_window(
assert warnings[0]["message"].startswith(f"no data found for the metric {metric_name}")
def test_non_existent_metrics_returns_404(
def test_non_existent_metrics_returns_warning(
signoz: types.SigNoz,
create_user_admin: None, # pylint: disable=unused-argument
get_token: Callable[[str, str], str],
@@ -635,9 +635,11 @@ def test_non_existent_metrics_returns_404(
start_2h = int((now - timedelta(hours=2)).timestamp() * 1000)
response = make_query_request(signoz, token, start_2h, end_ms, [query])
assert response.status_code == HTTPStatus.NOT_FOUND
assert response.status_code == HTTPStatus.OK
assert get_error_message(response.json()) == "could not find the metric whatevergoennnsgoeshere"
data = response.json()
warnings = get_all_warnings(data)
assert any("whatevergoennnsgoeshere" in w["message"] and "has never been received" in w["message"] for w in warnings), f"expected never-seen metric warning, got: {warnings}"
def test_non_existent_internal_metrics_returns_no_warning(