mirror of
https://github.com/SigNoz/signoz.git
synced 2026-06-18 22:40:34 +01:00
Compare commits
3 Commits
feat/dashb
...
infraM/rem
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
885e29cd7b | ||
|
|
f182ee0c49 | ||
|
|
2cf7ef93ea |
@@ -3938,8 +3938,6 @@ components:
|
||||
items:
|
||||
$ref: '#/components/schemas/InframonitoringtypesClusterRecord'
|
||||
type: array
|
||||
requiredMetricsCheck:
|
||||
$ref: '#/components/schemas/InframonitoringtypesRequiredMetricsCheck'
|
||||
total:
|
||||
type: integer
|
||||
type:
|
||||
@@ -3950,7 +3948,6 @@ components:
|
||||
- type
|
||||
- records
|
||||
- total
|
||||
- requiredMetricsCheck
|
||||
- endTimeBeforeRetention
|
||||
type: object
|
||||
InframonitoringtypesDaemonSetRecord:
|
||||
@@ -4007,8 +4004,6 @@ components:
|
||||
items:
|
||||
$ref: '#/components/schemas/InframonitoringtypesDaemonSetRecord'
|
||||
type: array
|
||||
requiredMetricsCheck:
|
||||
$ref: '#/components/schemas/InframonitoringtypesRequiredMetricsCheck'
|
||||
total:
|
||||
type: integer
|
||||
type:
|
||||
@@ -4019,7 +4014,6 @@ components:
|
||||
- type
|
||||
- records
|
||||
- total
|
||||
- requiredMetricsCheck
|
||||
- endTimeBeforeRetention
|
||||
type: object
|
||||
InframonitoringtypesDeploymentRecord:
|
||||
@@ -4076,8 +4070,6 @@ components:
|
||||
items:
|
||||
$ref: '#/components/schemas/InframonitoringtypesDeploymentRecord'
|
||||
type: array
|
||||
requiredMetricsCheck:
|
||||
$ref: '#/components/schemas/InframonitoringtypesRequiredMetricsCheck'
|
||||
total:
|
||||
type: integer
|
||||
type:
|
||||
@@ -4088,7 +4080,6 @@ components:
|
||||
- type
|
||||
- records
|
||||
- total
|
||||
- requiredMetricsCheck
|
||||
- endTimeBeforeRetention
|
||||
type: object
|
||||
InframonitoringtypesHostFilter:
|
||||
@@ -4154,8 +4145,6 @@ components:
|
||||
items:
|
||||
$ref: '#/components/schemas/InframonitoringtypesHostRecord'
|
||||
type: array
|
||||
requiredMetricsCheck:
|
||||
$ref: '#/components/schemas/InframonitoringtypesRequiredMetricsCheck'
|
||||
total:
|
||||
type: integer
|
||||
type:
|
||||
@@ -4166,7 +4155,6 @@ components:
|
||||
- type
|
||||
- records
|
||||
- total
|
||||
- requiredMetricsCheck
|
||||
- endTimeBeforeRetention
|
||||
type: object
|
||||
InframonitoringtypesJobRecord:
|
||||
@@ -4229,8 +4217,6 @@ components:
|
||||
items:
|
||||
$ref: '#/components/schemas/InframonitoringtypesJobRecord'
|
||||
type: array
|
||||
requiredMetricsCheck:
|
||||
$ref: '#/components/schemas/InframonitoringtypesRequiredMetricsCheck'
|
||||
total:
|
||||
type: integer
|
||||
type:
|
||||
@@ -4241,7 +4227,6 @@ components:
|
||||
- type
|
||||
- records
|
||||
- total
|
||||
- requiredMetricsCheck
|
||||
- endTimeBeforeRetention
|
||||
type: object
|
||||
InframonitoringtypesNamespaceRecord:
|
||||
@@ -4276,8 +4261,6 @@ components:
|
||||
items:
|
||||
$ref: '#/components/schemas/InframonitoringtypesNamespaceRecord'
|
||||
type: array
|
||||
requiredMetricsCheck:
|
||||
$ref: '#/components/schemas/InframonitoringtypesRequiredMetricsCheck'
|
||||
total:
|
||||
type: integer
|
||||
type:
|
||||
@@ -4288,7 +4271,6 @@ components:
|
||||
- type
|
||||
- records
|
||||
- total
|
||||
- requiredMetricsCheck
|
||||
- endTimeBeforeRetention
|
||||
type: object
|
||||
InframonitoringtypesNodeCondition:
|
||||
@@ -4353,8 +4335,6 @@ components:
|
||||
items:
|
||||
$ref: '#/components/schemas/InframonitoringtypesNodeRecord'
|
||||
type: array
|
||||
requiredMetricsCheck:
|
||||
$ref: '#/components/schemas/InframonitoringtypesRequiredMetricsCheck'
|
||||
total:
|
||||
type: integer
|
||||
type:
|
||||
@@ -4365,7 +4345,6 @@ components:
|
||||
- type
|
||||
- records
|
||||
- total
|
||||
- requiredMetricsCheck
|
||||
- endTimeBeforeRetention
|
||||
type: object
|
||||
InframonitoringtypesPodCountsByPhase:
|
||||
@@ -4451,8 +4430,6 @@ components:
|
||||
items:
|
||||
$ref: '#/components/schemas/InframonitoringtypesPodRecord'
|
||||
type: array
|
||||
requiredMetricsCheck:
|
||||
$ref: '#/components/schemas/InframonitoringtypesRequiredMetricsCheck'
|
||||
total:
|
||||
type: integer
|
||||
type:
|
||||
@@ -4463,7 +4440,6 @@ components:
|
||||
- type
|
||||
- records
|
||||
- total
|
||||
- requiredMetricsCheck
|
||||
- endTimeBeforeRetention
|
||||
type: object
|
||||
InframonitoringtypesPostableClusters:
|
||||
@@ -4726,16 +4702,6 @@ components:
|
||||
- end
|
||||
- limit
|
||||
type: object
|
||||
InframonitoringtypesRequiredMetricsCheck:
|
||||
properties:
|
||||
missingMetrics:
|
||||
items:
|
||||
type: string
|
||||
nullable: true
|
||||
type: array
|
||||
required:
|
||||
- missingMetrics
|
||||
type: object
|
||||
InframonitoringtypesResponseType:
|
||||
enum:
|
||||
- list
|
||||
@@ -4795,8 +4761,6 @@ components:
|
||||
items:
|
||||
$ref: '#/components/schemas/InframonitoringtypesStatefulSetRecord'
|
||||
type: array
|
||||
requiredMetricsCheck:
|
||||
$ref: '#/components/schemas/InframonitoringtypesRequiredMetricsCheck'
|
||||
total:
|
||||
type: integer
|
||||
type:
|
||||
@@ -4807,7 +4771,6 @@ components:
|
||||
- type
|
||||
- records
|
||||
- total
|
||||
- requiredMetricsCheck
|
||||
- endTimeBeforeRetention
|
||||
type: object
|
||||
InframonitoringtypesVolumeRecord:
|
||||
@@ -4855,8 +4818,6 @@ components:
|
||||
items:
|
||||
$ref: '#/components/schemas/InframonitoringtypesVolumeRecord'
|
||||
type: array
|
||||
requiredMetricsCheck:
|
||||
$ref: '#/components/schemas/InframonitoringtypesRequiredMetricsCheck'
|
||||
total:
|
||||
type: integer
|
||||
type:
|
||||
@@ -4867,7 +4828,6 @@ components:
|
||||
- type
|
||||
- records
|
||||
- total
|
||||
- requiredMetricsCheck
|
||||
- endTimeBeforeRetention
|
||||
type: object
|
||||
LlmpricingruletypesGettablePricingRules:
|
||||
@@ -14695,10 +14655,10 @@ paths:
|
||||
for custom groupBy keys; in both modes every row aggregates nodes and pods
|
||||
in the group. Supports filtering via a filter expression, custom groupBy,
|
||||
ordering by cpu / cpu_allocatable / memory / memory_allocatable, and pagination
|
||||
via offset/limit. Also reports missing required metrics and whether the requested
|
||||
time range falls before the data retention boundary. Numeric metric fields
|
||||
(clusterCPU, clusterCPUAllocatable, clusterMemory, clusterMemoryAllocatable)
|
||||
return -1 as a sentinel when no data is available for that field.'
|
||||
via offset/limit. Also reports whether the requested time range falls before
|
||||
the data retention boundary. Numeric metric fields (clusterCPU, clusterCPUAllocatable,
|
||||
clusterMemory, clusterMemoryAllocatable) return -1 as a sentinel when no data
|
||||
is available for that field.'
|
||||
operationId: ListClusters
|
||||
requestBody:
|
||||
content:
|
||||
@@ -14771,11 +14731,11 @@ paths:
|
||||
row aggregates pods owned by daemonsets in the group. Supports filtering via
|
||||
a filter expression, custom groupBy, ordering by cpu / cpu_request / cpu_limit
|
||||
/ memory / memory_request / memory_limit / desired_nodes / current_nodes,
|
||||
and pagination via offset/limit. Also reports missing required metrics and
|
||||
whether the requested time range falls before the data retention boundary.
|
||||
Numeric metric fields (daemonSetCPU, daemonSetCPURequest, daemonSetCPULimit,
|
||||
daemonSetMemory, daemonSetMemoryRequest, daemonSetMemoryLimit, desiredNodes,
|
||||
currentNodes) return -1 as a sentinel when no data is available for that field.'
|
||||
and pagination via offset/limit. Also reports whether the requested time range
|
||||
falls before the data retention boundary. Numeric metric fields (daemonSetCPU,
|
||||
daemonSetCPURequest, daemonSetCPULimit, daemonSetMemory, daemonSetMemoryRequest,
|
||||
daemonSetMemoryLimit, desiredNodes, currentNodes) return -1 as a sentinel
|
||||
when no data is available for that field.'
|
||||
operationId: ListDaemonSets
|
||||
requestBody:
|
||||
content:
|
||||
@@ -14846,11 +14806,11 @@ paths:
|
||||
group. Supports filtering via a filter expression, custom groupBy, ordering
|
||||
by cpu / cpu_request / cpu_limit / memory / memory_request / memory_limit
|
||||
/ desired_pods / available_pods, and pagination via offset/limit. Also reports
|
||||
missing required metrics and whether the requested time range falls before
|
||||
the data retention boundary. Numeric metric fields (deploymentCPU, deploymentCPURequest,
|
||||
deploymentCPULimit, deploymentMemory, deploymentMemoryRequest, deploymentMemoryLimit,
|
||||
desiredPods, availablePods) return -1 as a sentinel when no data is available
|
||||
for that field.'
|
||||
whether the requested time range falls before the data retention boundary.
|
||||
Numeric metric fields (deploymentCPU, deploymentCPURequest, deploymentCPULimit,
|
||||
deploymentMemory, deploymentMemoryRequest, deploymentMemoryLimit, desiredPods,
|
||||
availablePods) return -1 as a sentinel when no data is available for that
|
||||
field.'
|
||||
operationId: ListDeployments
|
||||
requestBody:
|
||||
content:
|
||||
@@ -14915,10 +14875,9 @@ paths:
|
||||
custom groupBy to aggregate hosts by any attribute, ordering by any of the
|
||||
five metrics, and pagination via offset/limit. The response type is ''list''
|
||||
for the default host.name grouping or ''grouped_list'' for custom groupBy
|
||||
keys. Also reports missing required metrics and whether the requested time
|
||||
range falls before the data retention boundary. Numeric metric fields (cpu,
|
||||
memory, wait, load15, diskUsage) return -1 as a sentinel when no data is available
|
||||
for that field.'
|
||||
keys. Also reports whether the requested time range falls before the data
|
||||
retention boundary. Numeric metric fields (cpu, memory, wait, load15, diskUsage)
|
||||
return -1 as a sentinel when no data is available for that field.'
|
||||
operationId: ListHosts
|
||||
requestBody:
|
||||
content:
|
||||
@@ -14992,11 +14951,11 @@ paths:
|
||||
jobs in the group. Supports filtering via a filter expression, custom groupBy,
|
||||
ordering by cpu / cpu_request / cpu_limit / memory / memory_request / memory_limit
|
||||
/ desired_successful_pods / active_pods / failed_pods / successful_pods, and
|
||||
pagination via offset/limit. Also reports missing required metrics and whether
|
||||
the requested time range falls before the data retention boundary. Numeric
|
||||
metric fields (jobCPU, jobCPURequest, jobCPULimit, jobMemory, jobMemoryRequest,
|
||||
jobMemoryLimit, desiredSuccessfulPods, activePods, failedPods, successfulPods)
|
||||
return -1 as a sentinel when no data is available for that field.'
|
||||
pagination via offset/limit. Also reports whether the requested time range
|
||||
falls before the data retention boundary. Numeric metric fields (jobCPU, jobCPURequest,
|
||||
jobCPULimit, jobMemory, jobMemoryRequest, jobMemoryLimit, desiredSuccessfulPods,
|
||||
activePods, failedPods, successfulPods) return -1 as a sentinel when no data
|
||||
is available for that field.'
|
||||
operationId: ListJobs
|
||||
requestBody:
|
||||
content:
|
||||
@@ -15061,10 +15020,10 @@ paths:
|
||||
type is ''list'' for the default k8s.namespace.name grouping or ''grouped_list''
|
||||
for custom groupBy keys; in both modes every row aggregates pods in the group.
|
||||
Supports filtering via a filter expression, custom groupBy, ordering by cpu
|
||||
/ memory, and pagination via offset/limit. Also reports missing required metrics
|
||||
and whether the requested time range falls before the data retention boundary.
|
||||
Numeric metric fields (namespaceCPU, namespaceMemory) return -1 as a sentinel
|
||||
when no data is available for that field.'
|
||||
/ memory, and pagination via offset/limit. Also reports whether the requested
|
||||
time range falls before the data retention boundary. Numeric metric fields
|
||||
(namespaceCPU, namespaceMemory) return -1 as a sentinel when no data is available
|
||||
for that field.'
|
||||
operationId: ListNamespaces
|
||||
requestBody:
|
||||
content:
|
||||
@@ -15132,10 +15091,10 @@ paths:
|
||||
for custom groupBy keys (each row aggregates nodes in the group; condition
|
||||
stays no_data). Supports filtering via a filter expression, custom groupBy,
|
||||
ordering by cpu / cpu_allocatable / memory / memory_allocatable, and pagination
|
||||
via offset/limit. Also reports missing required metrics and whether the requested
|
||||
time range falls before the data retention boundary. Numeric metric fields
|
||||
(nodeCPU, nodeCPUAllocatable, nodeMemory, nodeMemoryAllocatable) return -1
|
||||
as a sentinel when no data is available for that field.'
|
||||
via offset/limit. Also reports whether the requested time range falls before
|
||||
the data retention boundary. Numeric metric fields (nodeCPU, nodeCPUAllocatable,
|
||||
nodeMemory, nodeMemoryAllocatable) return -1 as a sentinel when no data is
|
||||
available for that field.'
|
||||
operationId: ListNodes
|
||||
requestBody:
|
||||
content:
|
||||
@@ -15204,11 +15163,10 @@ paths:
|
||||
is one pod with its current phase) or ''grouped_list'' for custom groupBy
|
||||
keys (each row aggregates pods in the group with per-phase counts under podCountsByPhase:
|
||||
{ pending, running, succeeded, failed, unknown } derived from each pod''s
|
||||
latest phase in the window). Also reports missing required metrics and whether
|
||||
the requested time range falls before the data retention boundary. Numeric
|
||||
metric fields (podCPU, podCPURequest, podCPULimit, podMemory, podMemoryRequest,
|
||||
podMemoryLimit, podAge) return -1 as a sentinel when no data is available
|
||||
for that field.'
|
||||
latest phase in the window). Also reports whether the requested time range
|
||||
falls before the data retention boundary. Numeric metric fields (podCPU, podCPURequest,
|
||||
podCPULimit, podMemory, podMemoryRequest, podMemoryLimit, podAge) return -1
|
||||
as a sentinel when no data is available for that field.'
|
||||
operationId: ListPods
|
||||
requestBody:
|
||||
content:
|
||||
@@ -15275,11 +15233,10 @@ paths:
|
||||
usage, inodes, inodes_free, inodes_used), and pagination via offset/limit.
|
||||
The response type is ''list'' for the default k8s.persistentvolumeclaim.name
|
||||
grouping or ''grouped_list'' for custom groupBy keys; in both modes every
|
||||
row aggregates volumes in the group. Also reports missing required metrics
|
||||
and whether the requested time range falls before the data retention boundary.
|
||||
Numeric metric fields (volumeAvailable, volumeCapacity, volumeUsage, volumeInodes,
|
||||
volumeInodesFree, volumeInodesUsed) return -1 as a sentinel when no data is
|
||||
available for that field.'
|
||||
row aggregates volumes in the group. Also reports whether the requested time
|
||||
range falls before the data retention boundary. Numeric metric fields (volumeAvailable,
|
||||
volumeCapacity, volumeUsage, volumeInodes, volumeInodesFree, volumeInodesUsed)
|
||||
return -1 as a sentinel when no data is available for that field.'
|
||||
operationId: ListVolumes
|
||||
requestBody:
|
||||
content:
|
||||
@@ -15350,11 +15307,10 @@ paths:
|
||||
statefulsets in the group. Supports filtering via a filter expression, custom
|
||||
groupBy, ordering by cpu / cpu_request / cpu_limit / memory / memory_request
|
||||
/ memory_limit / desired_pods / current_pods, and pagination via offset/limit.
|
||||
Also reports missing required metrics and whether the requested time range
|
||||
falls before the data retention boundary. Numeric metric fields (statefulSetCPU,
|
||||
statefulSetCPURequest, statefulSetCPULimit, statefulSetMemory, statefulSetMemoryRequest,
|
||||
statefulSetMemoryLimit, desiredPods, currentPods) return -1 as a sentinel
|
||||
when no data is available for that field.'
|
||||
Also reports whether the requested time range falls before the data retention
|
||||
boundary. Numeric metric fields (statefulSetCPU, statefulSetCPURequest, statefulSetCPULimit,
|
||||
statefulSetMemory, statefulSetMemoryRequest, statefulSetMemoryLimit, desiredPods,
|
||||
currentPods) return -1 as a sentinel when no data is available for that field.'
|
||||
operationId: ListStatefulSets
|
||||
requestBody:
|
||||
content:
|
||||
|
||||
@@ -39,7 +39,7 @@ import { GeneratedAPIInstance } from '../../../generatedAPIInstance';
|
||||
import type { ErrorType, BodyType } from '../../../generatedAPIInstance';
|
||||
|
||||
/**
|
||||
* Returns a paginated list of Kubernetes clusters with key aggregated metrics derived by summing per-node values within the group: CPU usage, CPU allocatable, memory working set, memory allocatable. Each row also reports per-group nodeCountsByReadiness ({ ready, notReady } from each node's latest k8s.node.condition_ready value) and per-group podCountsByPhase ({ pending, running, succeeded, failed, unknown } from each pod's latest k8s.pod.phase value). Each cluster includes metadata attributes (k8s.cluster.name). The response type is 'list' for the default k8s.cluster.name grouping or 'grouped_list' for custom groupBy keys; in both modes every row aggregates nodes and pods in the group. Supports filtering via a filter expression, custom groupBy, ordering by cpu / cpu_allocatable / memory / memory_allocatable, and pagination via offset/limit. Also reports missing required metrics and whether the requested time range falls before the data retention boundary. Numeric metric fields (clusterCPU, clusterCPUAllocatable, clusterMemory, clusterMemoryAllocatable) return -1 as a sentinel when no data is available for that field.
|
||||
* Returns a paginated list of Kubernetes clusters with key aggregated metrics derived by summing per-node values within the group: CPU usage, CPU allocatable, memory working set, memory allocatable. Each row also reports per-group nodeCountsByReadiness ({ ready, notReady } from each node's latest k8s.node.condition_ready value) and per-group podCountsByPhase ({ pending, running, succeeded, failed, unknown } from each pod's latest k8s.pod.phase value). Each cluster includes metadata attributes (k8s.cluster.name). The response type is 'list' for the default k8s.cluster.name grouping or 'grouped_list' for custom groupBy keys; in both modes every row aggregates nodes and pods in the group. Supports filtering via a filter expression, custom groupBy, ordering by cpu / cpu_allocatable / memory / memory_allocatable, and pagination via offset/limit. Also reports whether the requested time range falls before the data retention boundary. Numeric metric fields (clusterCPU, clusterCPUAllocatable, clusterMemory, clusterMemoryAllocatable) return -1 as a sentinel when no data is available for that field.
|
||||
* @summary List Clusters for Infra Monitoring
|
||||
*/
|
||||
export const listClusters = (
|
||||
@@ -122,7 +122,7 @@ export const useListClusters = <
|
||||
return useMutation(getListClustersMutationOptions(options));
|
||||
};
|
||||
/**
|
||||
* Returns a paginated list of Kubernetes DaemonSets with key aggregated pod metrics: CPU usage and memory working set summed across pods owned by the daemonset, plus average CPU/memory request and limit utilization (daemonSetCPURequest, daemonSetCPULimit, daemonSetMemoryRequest, daemonSetMemoryLimit). Each row also reports the latest known node-level counters from kube-state-metrics: desiredNodes (k8s.daemonset.desired_scheduled_nodes, the number of nodes the daemonset wants to run on) and currentNodes (k8s.daemonset.current_scheduled_nodes, the number of nodes the daemonset currently runs on) — note these are node counts, not pod counts. It also reports per-group podCountsByPhase ({ pending, running, succeeded, failed, unknown } from each pod's latest k8s.pod.phase value). Each daemonset includes metadata attributes (k8s.daemonset.name, k8s.namespace.name, k8s.cluster.name). The response type is 'list' for the default k8s.daemonset.name grouping or 'grouped_list' for custom groupBy keys; in both modes every row aggregates pods owned by daemonsets in the group. Supports filtering via a filter expression, custom groupBy, ordering by cpu / cpu_request / cpu_limit / memory / memory_request / memory_limit / desired_nodes / current_nodes, and pagination via offset/limit. Also reports missing required metrics and whether the requested time range falls before the data retention boundary. Numeric metric fields (daemonSetCPU, daemonSetCPURequest, daemonSetCPULimit, daemonSetMemory, daemonSetMemoryRequest, daemonSetMemoryLimit, desiredNodes, currentNodes) return -1 as a sentinel when no data is available for that field.
|
||||
* Returns a paginated list of Kubernetes DaemonSets with key aggregated pod metrics: CPU usage and memory working set summed across pods owned by the daemonset, plus average CPU/memory request and limit utilization (daemonSetCPURequest, daemonSetCPULimit, daemonSetMemoryRequest, daemonSetMemoryLimit). Each row also reports the latest known node-level counters from kube-state-metrics: desiredNodes (k8s.daemonset.desired_scheduled_nodes, the number of nodes the daemonset wants to run on) and currentNodes (k8s.daemonset.current_scheduled_nodes, the number of nodes the daemonset currently runs on) — note these are node counts, not pod counts. It also reports per-group podCountsByPhase ({ pending, running, succeeded, failed, unknown } from each pod's latest k8s.pod.phase value). Each daemonset includes metadata attributes (k8s.daemonset.name, k8s.namespace.name, k8s.cluster.name). The response type is 'list' for the default k8s.daemonset.name grouping or 'grouped_list' for custom groupBy keys; in both modes every row aggregates pods owned by daemonsets in the group. Supports filtering via a filter expression, custom groupBy, ordering by cpu / cpu_request / cpu_limit / memory / memory_request / memory_limit / desired_nodes / current_nodes, and pagination via offset/limit. Also reports whether the requested time range falls before the data retention boundary. Numeric metric fields (daemonSetCPU, daemonSetCPURequest, daemonSetCPULimit, daemonSetMemory, daemonSetMemoryRequest, daemonSetMemoryLimit, desiredNodes, currentNodes) return -1 as a sentinel when no data is available for that field.
|
||||
* @summary List DaemonSets for Infra Monitoring
|
||||
*/
|
||||
export const listDaemonSets = (
|
||||
@@ -205,7 +205,7 @@ export const useListDaemonSets = <
|
||||
return useMutation(getListDaemonSetsMutationOptions(options));
|
||||
};
|
||||
/**
|
||||
* Returns a paginated list of Kubernetes Deployments with key aggregated pod metrics: CPU usage and memory working set summed across pods owned by the deployment, plus average CPU/memory request and limit utilization (deploymentCPURequest, deploymentCPULimit, deploymentMemoryRequest, deploymentMemoryLimit). Each row also reports the latest known desiredPods (k8s.deployment.desired) and availablePods (k8s.deployment.available) replica counts and per-group podCountsByPhase ({ pending, running, succeeded, failed, unknown } from each pod's latest k8s.pod.phase value). Each deployment includes metadata attributes (k8s.deployment.name, k8s.namespace.name, k8s.cluster.name). The response type is 'list' for the default k8s.deployment.name grouping or 'grouped_list' for custom groupBy keys; in both modes every row aggregates pods owned by deployments in the group. Supports filtering via a filter expression, custom groupBy, ordering by cpu / cpu_request / cpu_limit / memory / memory_request / memory_limit / desired_pods / available_pods, and pagination via offset/limit. Also reports missing required metrics and whether the requested time range falls before the data retention boundary. Numeric metric fields (deploymentCPU, deploymentCPURequest, deploymentCPULimit, deploymentMemory, deploymentMemoryRequest, deploymentMemoryLimit, desiredPods, availablePods) return -1 as a sentinel when no data is available for that field.
|
||||
* Returns a paginated list of Kubernetes Deployments with key aggregated pod metrics: CPU usage and memory working set summed across pods owned by the deployment, plus average CPU/memory request and limit utilization (deploymentCPURequest, deploymentCPULimit, deploymentMemoryRequest, deploymentMemoryLimit). Each row also reports the latest known desiredPods (k8s.deployment.desired) and availablePods (k8s.deployment.available) replica counts and per-group podCountsByPhase ({ pending, running, succeeded, failed, unknown } from each pod's latest k8s.pod.phase value). Each deployment includes metadata attributes (k8s.deployment.name, k8s.namespace.name, k8s.cluster.name). The response type is 'list' for the default k8s.deployment.name grouping or 'grouped_list' for custom groupBy keys; in both modes every row aggregates pods owned by deployments in the group. Supports filtering via a filter expression, custom groupBy, ordering by cpu / cpu_request / cpu_limit / memory / memory_request / memory_limit / desired_pods / available_pods, and pagination via offset/limit. Also reports whether the requested time range falls before the data retention boundary. Numeric metric fields (deploymentCPU, deploymentCPURequest, deploymentCPULimit, deploymentMemory, deploymentMemoryRequest, deploymentMemoryLimit, desiredPods, availablePods) return -1 as a sentinel when no data is available for that field.
|
||||
* @summary List Deployments for Infra Monitoring
|
||||
*/
|
||||
export const listDeployments = (
|
||||
@@ -288,7 +288,7 @@ export const useListDeployments = <
|
||||
return useMutation(getListDeploymentsMutationOptions(options));
|
||||
};
|
||||
/**
|
||||
* Returns a paginated list of hosts with key infrastructure metrics: CPU usage (%), memory usage (%), I/O wait (%), disk usage (%), and 15-minute load average. Each host includes its current status (active/inactive based on metrics reported in the last 10 minutes) and metadata attributes (e.g., os.type). Supports filtering via a filter expression, filtering by host status, custom groupBy to aggregate hosts by any attribute, ordering by any of the five metrics, and pagination via offset/limit. The response type is 'list' for the default host.name grouping or 'grouped_list' for custom groupBy keys. Also reports missing required metrics and whether the requested time range falls before the data retention boundary. Numeric metric fields (cpu, memory, wait, load15, diskUsage) return -1 as a sentinel when no data is available for that field.
|
||||
* Returns a paginated list of hosts with key infrastructure metrics: CPU usage (%), memory usage (%), I/O wait (%), disk usage (%), and 15-minute load average. Each host includes its current status (active/inactive based on metrics reported in the last 10 minutes) and metadata attributes (e.g., os.type). Supports filtering via a filter expression, filtering by host status, custom groupBy to aggregate hosts by any attribute, ordering by any of the five metrics, and pagination via offset/limit. The response type is 'list' for the default host.name grouping or 'grouped_list' for custom groupBy keys. Also reports whether the requested time range falls before the data retention boundary. Numeric metric fields (cpu, memory, wait, load15, diskUsage) return -1 as a sentinel when no data is available for that field.
|
||||
* @summary List Hosts for Infra Monitoring
|
||||
*/
|
||||
export const listHosts = (
|
||||
@@ -371,7 +371,7 @@ export const useListHosts = <
|
||||
return useMutation(getListHostsMutationOptions(options));
|
||||
};
|
||||
/**
|
||||
* Returns a paginated list of Kubernetes Jobs with key aggregated pod metrics: CPU usage and memory working set summed across pods owned by the job, plus average CPU/memory request and limit utilization (jobCPURequest, jobCPULimit, jobMemoryRequest, jobMemoryLimit). Each row also reports the latest known job-level counters from kube-state-metrics: desiredSuccessfulPods (k8s.job.desired_successful_pods, the target completion count), activePods (k8s.job.active_pods), failedPods (k8s.job.failed_pods, cumulative across the lifetime of the job), and successfulPods (k8s.job.successful_pods, cumulative). It also reports per-group podCountsByPhase ({ pending, running, succeeded, failed, unknown } from each pod's latest k8s.pod.phase value); note podCountsByPhase.failed (current pod-phase) is distinct from failedPods (cumulative job kube-state-metric). Each job includes metadata attributes (k8s.job.name, k8s.namespace.name, k8s.cluster.name). The response type is 'list' for the default k8s.job.name grouping or 'grouped_list' for custom groupBy keys; in both modes every row aggregates pods owned by jobs in the group. Supports filtering via a filter expression, custom groupBy, ordering by cpu / cpu_request / cpu_limit / memory / memory_request / memory_limit / desired_successful_pods / active_pods / failed_pods / successful_pods, and pagination via offset/limit. Also reports missing required metrics and whether the requested time range falls before the data retention boundary. Numeric metric fields (jobCPU, jobCPURequest, jobCPULimit, jobMemory, jobMemoryRequest, jobMemoryLimit, desiredSuccessfulPods, activePods, failedPods, successfulPods) return -1 as a sentinel when no data is available for that field.
|
||||
* Returns a paginated list of Kubernetes Jobs with key aggregated pod metrics: CPU usage and memory working set summed across pods owned by the job, plus average CPU/memory request and limit utilization (jobCPURequest, jobCPULimit, jobMemoryRequest, jobMemoryLimit). Each row also reports the latest known job-level counters from kube-state-metrics: desiredSuccessfulPods (k8s.job.desired_successful_pods, the target completion count), activePods (k8s.job.active_pods), failedPods (k8s.job.failed_pods, cumulative across the lifetime of the job), and successfulPods (k8s.job.successful_pods, cumulative). It also reports per-group podCountsByPhase ({ pending, running, succeeded, failed, unknown } from each pod's latest k8s.pod.phase value); note podCountsByPhase.failed (current pod-phase) is distinct from failedPods (cumulative job kube-state-metric). Each job includes metadata attributes (k8s.job.name, k8s.namespace.name, k8s.cluster.name). The response type is 'list' for the default k8s.job.name grouping or 'grouped_list' for custom groupBy keys; in both modes every row aggregates pods owned by jobs in the group. Supports filtering via a filter expression, custom groupBy, ordering by cpu / cpu_request / cpu_limit / memory / memory_request / memory_limit / desired_successful_pods / active_pods / failed_pods / successful_pods, and pagination via offset/limit. Also reports whether the requested time range falls before the data retention boundary. Numeric metric fields (jobCPU, jobCPURequest, jobCPULimit, jobMemory, jobMemoryRequest, jobMemoryLimit, desiredSuccessfulPods, activePods, failedPods, successfulPods) return -1 as a sentinel when no data is available for that field.
|
||||
* @summary List Jobs for Infra Monitoring
|
||||
*/
|
||||
export const listJobs = (
|
||||
@@ -454,7 +454,7 @@ export const useListJobs = <
|
||||
return useMutation(getListJobsMutationOptions(options));
|
||||
};
|
||||
/**
|
||||
* Returns a paginated list of Kubernetes namespaces with key aggregated pod metrics: CPU usage and memory working set (summed across pods in the group), plus per-group podCountsByPhase ({ pending, running, succeeded, failed, unknown } from each pod's latest k8s.pod.phase value in the window). Each namespace includes metadata attributes (k8s.namespace.name, k8s.cluster.name). The response type is 'list' for the default k8s.namespace.name grouping or 'grouped_list' for custom groupBy keys; in both modes every row aggregates pods in the group. Supports filtering via a filter expression, custom groupBy, ordering by cpu / memory, and pagination via offset/limit. Also reports missing required metrics and whether the requested time range falls before the data retention boundary. Numeric metric fields (namespaceCPU, namespaceMemory) return -1 as a sentinel when no data is available for that field.
|
||||
* Returns a paginated list of Kubernetes namespaces with key aggregated pod metrics: CPU usage and memory working set (summed across pods in the group), plus per-group podCountsByPhase ({ pending, running, succeeded, failed, unknown } from each pod's latest k8s.pod.phase value in the window). Each namespace includes metadata attributes (k8s.namespace.name, k8s.cluster.name). The response type is 'list' for the default k8s.namespace.name grouping or 'grouped_list' for custom groupBy keys; in both modes every row aggregates pods in the group. Supports filtering via a filter expression, custom groupBy, ordering by cpu / memory, and pagination via offset/limit. Also reports whether the requested time range falls before the data retention boundary. Numeric metric fields (namespaceCPU, namespaceMemory) return -1 as a sentinel when no data is available for that field.
|
||||
* @summary List Namespaces for Infra Monitoring
|
||||
*/
|
||||
export const listNamespaces = (
|
||||
@@ -537,7 +537,7 @@ export const useListNamespaces = <
|
||||
return useMutation(getListNamespacesMutationOptions(options));
|
||||
};
|
||||
/**
|
||||
* Returns a paginated list of Kubernetes nodes with key metrics: CPU usage, CPU allocatable, memory working set, memory allocatable, per-group nodeCountsByReadiness ({ ready, notReady } from each node's latest k8s.node.condition_ready in the window) and per-group podCountsByPhase ({ pending, running, succeeded, failed, unknown } for pods scheduled on the listed nodes). Each node includes metadata attributes (k8s.node.uid, k8s.cluster.name). The response type is 'list' for the default k8s.node.name grouping (each row is one node with its current condition string: ready / not_ready / no_data) or 'grouped_list' for custom groupBy keys (each row aggregates nodes in the group; condition stays no_data). Supports filtering via a filter expression, custom groupBy, ordering by cpu / cpu_allocatable / memory / memory_allocatable, and pagination via offset/limit. Also reports missing required metrics and whether the requested time range falls before the data retention boundary. Numeric metric fields (nodeCPU, nodeCPUAllocatable, nodeMemory, nodeMemoryAllocatable) return -1 as a sentinel when no data is available for that field.
|
||||
* Returns a paginated list of Kubernetes nodes with key metrics: CPU usage, CPU allocatable, memory working set, memory allocatable, per-group nodeCountsByReadiness ({ ready, notReady } from each node's latest k8s.node.condition_ready in the window) and per-group podCountsByPhase ({ pending, running, succeeded, failed, unknown } for pods scheduled on the listed nodes). Each node includes metadata attributes (k8s.node.uid, k8s.cluster.name). The response type is 'list' for the default k8s.node.name grouping (each row is one node with its current condition string: ready / not_ready / no_data) or 'grouped_list' for custom groupBy keys (each row aggregates nodes in the group; condition stays no_data). Supports filtering via a filter expression, custom groupBy, ordering by cpu / cpu_allocatable / memory / memory_allocatable, and pagination via offset/limit. Also reports whether the requested time range falls before the data retention boundary. Numeric metric fields (nodeCPU, nodeCPUAllocatable, nodeMemory, nodeMemoryAllocatable) return -1 as a sentinel when no data is available for that field.
|
||||
* @summary List Nodes for Infra Monitoring
|
||||
*/
|
||||
export const listNodes = (
|
||||
@@ -620,7 +620,7 @@ export const useListNodes = <
|
||||
return useMutation(getListNodesMutationOptions(options));
|
||||
};
|
||||
/**
|
||||
* Returns a paginated list of Kubernetes pods with key metrics: CPU usage, CPU request/limit utilization, memory working set, memory request/limit utilization, current pod phase (pending/running/succeeded/failed/unknown/no_data), and pod age (ms since start time). Each pod includes metadata attributes (namespace, node, workload owner such as deployment/statefulset/daemonset/job/cronjob, cluster). Supports filtering via a filter expression, custom groupBy to aggregate pods by any attribute, ordering by any of the six metrics (cpu, cpu_request, cpu_limit, memory, memory_request, memory_limit), and pagination via offset/limit. The response type is 'list' for the default k8s.pod.uid grouping (each row is one pod with its current phase) or 'grouped_list' for custom groupBy keys (each row aggregates pods in the group with per-phase counts under podCountsByPhase: { pending, running, succeeded, failed, unknown } derived from each pod's latest phase in the window). Also reports missing required metrics and whether the requested time range falls before the data retention boundary. Numeric metric fields (podCPU, podCPURequest, podCPULimit, podMemory, podMemoryRequest, podMemoryLimit, podAge) return -1 as a sentinel when no data is available for that field.
|
||||
* Returns a paginated list of Kubernetes pods with key metrics: CPU usage, CPU request/limit utilization, memory working set, memory request/limit utilization, current pod phase (pending/running/succeeded/failed/unknown/no_data), and pod age (ms since start time). Each pod includes metadata attributes (namespace, node, workload owner such as deployment/statefulset/daemonset/job/cronjob, cluster). Supports filtering via a filter expression, custom groupBy to aggregate pods by any attribute, ordering by any of the six metrics (cpu, cpu_request, cpu_limit, memory, memory_request, memory_limit), and pagination via offset/limit. The response type is 'list' for the default k8s.pod.uid grouping (each row is one pod with its current phase) or 'grouped_list' for custom groupBy keys (each row aggregates pods in the group with per-phase counts under podCountsByPhase: { pending, running, succeeded, failed, unknown } derived from each pod's latest phase in the window). Also reports whether the requested time range falls before the data retention boundary. Numeric metric fields (podCPU, podCPURequest, podCPULimit, podMemory, podMemoryRequest, podMemoryLimit, podAge) return -1 as a sentinel when no data is available for that field.
|
||||
* @summary List Pods for Infra Monitoring
|
||||
*/
|
||||
export const listPods = (
|
||||
@@ -703,7 +703,7 @@ export const useListPods = <
|
||||
return useMutation(getListPodsMutationOptions(options));
|
||||
};
|
||||
/**
|
||||
* Returns a paginated list of Kubernetes persistent volume claims (PVCs) with key volume metrics: available bytes, capacity bytes, usage (capacity - available), inodes, free inodes, and used inodes. Each row also includes metadata attributes (k8s.persistentvolumeclaim.name, k8s.pod.uid, k8s.pod.name, k8s.namespace.name, k8s.node.name, k8s.statefulset.name, k8s.cluster.name). Supports filtering via a filter expression, custom groupBy to aggregate volumes by any attribute, ordering by any of the six metrics (available, capacity, usage, inodes, inodes_free, inodes_used), and pagination via offset/limit. The response type is 'list' for the default k8s.persistentvolumeclaim.name grouping or 'grouped_list' for custom groupBy keys; in both modes every row aggregates volumes in the group. Also reports missing required metrics and whether the requested time range falls before the data retention boundary. Numeric metric fields (volumeAvailable, volumeCapacity, volumeUsage, volumeInodes, volumeInodesFree, volumeInodesUsed) return -1 as a sentinel when no data is available for that field.
|
||||
* Returns a paginated list of Kubernetes persistent volume claims (PVCs) with key volume metrics: available bytes, capacity bytes, usage (capacity - available), inodes, free inodes, and used inodes. Each row also includes metadata attributes (k8s.persistentvolumeclaim.name, k8s.pod.uid, k8s.pod.name, k8s.namespace.name, k8s.node.name, k8s.statefulset.name, k8s.cluster.name). Supports filtering via a filter expression, custom groupBy to aggregate volumes by any attribute, ordering by any of the six metrics (available, capacity, usage, inodes, inodes_free, inodes_used), and pagination via offset/limit. The response type is 'list' for the default k8s.persistentvolumeclaim.name grouping or 'grouped_list' for custom groupBy keys; in both modes every row aggregates volumes in the group. Also reports whether the requested time range falls before the data retention boundary. Numeric metric fields (volumeAvailable, volumeCapacity, volumeUsage, volumeInodes, volumeInodesFree, volumeInodesUsed) return -1 as a sentinel when no data is available for that field.
|
||||
* @summary List Volumes for Infra Monitoring
|
||||
*/
|
||||
export const listVolumes = (
|
||||
@@ -786,7 +786,7 @@ export const useListVolumes = <
|
||||
return useMutation(getListVolumesMutationOptions(options));
|
||||
};
|
||||
/**
|
||||
* Returns a paginated list of Kubernetes StatefulSets with key aggregated pod metrics: CPU usage and memory working set summed across pods owned by the statefulset, plus average CPU/memory request and limit utilization (statefulSetCPURequest, statefulSetCPULimit, statefulSetMemoryRequest, statefulSetMemoryLimit). Each row also reports the latest known desiredPods (k8s.statefulset.desired_pods) and currentPods (k8s.statefulset.current_pods) replica counts and per-group podCountsByPhase ({ pending, running, succeeded, failed, unknown } from each pod's latest k8s.pod.phase value). Each statefulset includes metadata attributes (k8s.statefulset.name, k8s.namespace.name, k8s.cluster.name). The response type is 'list' for the default k8s.statefulset.name grouping or 'grouped_list' for custom groupBy keys; in both modes every row aggregates pods owned by statefulsets in the group. Supports filtering via a filter expression, custom groupBy, ordering by cpu / cpu_request / cpu_limit / memory / memory_request / memory_limit / desired_pods / current_pods, and pagination via offset/limit. Also reports missing required metrics and whether the requested time range falls before the data retention boundary. Numeric metric fields (statefulSetCPU, statefulSetCPURequest, statefulSetCPULimit, statefulSetMemory, statefulSetMemoryRequest, statefulSetMemoryLimit, desiredPods, currentPods) return -1 as a sentinel when no data is available for that field.
|
||||
* Returns a paginated list of Kubernetes StatefulSets with key aggregated pod metrics: CPU usage and memory working set summed across pods owned by the statefulset, plus average CPU/memory request and limit utilization (statefulSetCPURequest, statefulSetCPULimit, statefulSetMemoryRequest, statefulSetMemoryLimit). Each row also reports the latest known desiredPods (k8s.statefulset.desired_pods) and currentPods (k8s.statefulset.current_pods) replica counts and per-group podCountsByPhase ({ pending, running, succeeded, failed, unknown } from each pod's latest k8s.pod.phase value). Each statefulset includes metadata attributes (k8s.statefulset.name, k8s.namespace.name, k8s.cluster.name). The response type is 'list' for the default k8s.statefulset.name grouping or 'grouped_list' for custom groupBy keys; in both modes every row aggregates pods owned by statefulsets in the group. Supports filtering via a filter expression, custom groupBy, ordering by cpu / cpu_request / cpu_limit / memory / memory_request / memory_limit / desired_pods / current_pods, and pagination via offset/limit. Also reports whether the requested time range falls before the data retention boundary. Numeric metric fields (statefulSetCPU, statefulSetCPURequest, statefulSetCPULimit, statefulSetMemory, statefulSetMemoryRequest, statefulSetMemoryLimit, desiredPods, currentPods) return -1 as a sentinel when no data is available for that field.
|
||||
* @summary List StatefulSets for Infra Monitoring
|
||||
*/
|
||||
export const listStatefulSets = (
|
||||
|
||||
@@ -5423,13 +5423,6 @@ export interface InframonitoringtypesClusterRecordDTO {
|
||||
podCountsByPhase: InframonitoringtypesPodCountsByPhaseDTO;
|
||||
}
|
||||
|
||||
export interface InframonitoringtypesRequiredMetricsCheckDTO {
|
||||
/**
|
||||
* @type array,null
|
||||
*/
|
||||
missingMetrics: string[] | null;
|
||||
}
|
||||
|
||||
export enum InframonitoringtypesResponseTypeDTO {
|
||||
list = 'list',
|
||||
grouped_list = 'grouped_list',
|
||||
@@ -5465,7 +5458,6 @@ export interface InframonitoringtypesClustersDTO {
|
||||
* @type array
|
||||
*/
|
||||
records: InframonitoringtypesClusterRecordDTO[];
|
||||
requiredMetricsCheck: InframonitoringtypesRequiredMetricsCheckDTO;
|
||||
/**
|
||||
* @type integer
|
||||
*/
|
||||
@@ -5543,7 +5535,6 @@ export interface InframonitoringtypesDaemonSetsDTO {
|
||||
* @type array
|
||||
*/
|
||||
records: InframonitoringtypesDaemonSetRecordDTO[];
|
||||
requiredMetricsCheck: InframonitoringtypesRequiredMetricsCheckDTO;
|
||||
/**
|
||||
* @type integer
|
||||
*/
|
||||
@@ -5621,7 +5612,6 @@ export interface InframonitoringtypesDeploymentsDTO {
|
||||
* @type array
|
||||
*/
|
||||
records: InframonitoringtypesDeploymentRecordDTO[];
|
||||
requiredMetricsCheck: InframonitoringtypesRequiredMetricsCheckDTO;
|
||||
/**
|
||||
* @type integer
|
||||
*/
|
||||
@@ -5707,7 +5697,6 @@ export interface InframonitoringtypesHostsDTO {
|
||||
* @type array
|
||||
*/
|
||||
records: InframonitoringtypesHostRecordDTO[];
|
||||
requiredMetricsCheck: InframonitoringtypesRequiredMetricsCheckDTO;
|
||||
/**
|
||||
* @type integer
|
||||
*/
|
||||
@@ -5793,7 +5782,6 @@ export interface InframonitoringtypesJobsDTO {
|
||||
* @type array
|
||||
*/
|
||||
records: InframonitoringtypesJobRecordDTO[];
|
||||
requiredMetricsCheck: InframonitoringtypesRequiredMetricsCheckDTO;
|
||||
/**
|
||||
* @type integer
|
||||
*/
|
||||
@@ -5843,7 +5831,6 @@ export interface InframonitoringtypesNamespacesDTO {
|
||||
* @type array
|
||||
*/
|
||||
records: InframonitoringtypesNamespaceRecordDTO[];
|
||||
requiredMetricsCheck: InframonitoringtypesRequiredMetricsCheckDTO;
|
||||
/**
|
||||
* @type integer
|
||||
*/
|
||||
@@ -5910,7 +5897,6 @@ export interface InframonitoringtypesNodesDTO {
|
||||
* @type array
|
||||
*/
|
||||
records: InframonitoringtypesNodeRecordDTO[];
|
||||
requiredMetricsCheck: InframonitoringtypesRequiredMetricsCheckDTO;
|
||||
/**
|
||||
* @type integer
|
||||
*/
|
||||
@@ -5994,7 +5980,6 @@ export interface InframonitoringtypesPodsDTO {
|
||||
* @type array
|
||||
*/
|
||||
records: InframonitoringtypesPodRecordDTO[];
|
||||
requiredMetricsCheck: InframonitoringtypesRequiredMetricsCheckDTO;
|
||||
/**
|
||||
* @type integer
|
||||
*/
|
||||
@@ -6342,7 +6327,6 @@ export interface InframonitoringtypesStatefulSetsDTO {
|
||||
* @type array
|
||||
*/
|
||||
records: InframonitoringtypesStatefulSetRecordDTO[];
|
||||
requiredMetricsCheck: InframonitoringtypesRequiredMetricsCheckDTO;
|
||||
/**
|
||||
* @type integer
|
||||
*/
|
||||
@@ -6411,7 +6395,6 @@ export interface InframonitoringtypesVolumesDTO {
|
||||
* @type array
|
||||
*/
|
||||
records: InframonitoringtypesVolumeRecordDTO[];
|
||||
requiredMetricsCheck: InframonitoringtypesRequiredMetricsCheckDTO;
|
||||
/**
|
||||
* @type integer
|
||||
*/
|
||||
|
||||
@@ -16,7 +16,7 @@ func (provider *provider) addInfraMonitoringRoutes(router *mux.Router) error {
|
||||
ID: "ListHosts",
|
||||
Tags: []string{"inframonitoring"},
|
||||
Summary: "List Hosts for Infra Monitoring",
|
||||
Description: "Returns a paginated list of hosts with key infrastructure metrics: CPU usage (%), memory usage (%), I/O wait (%), disk usage (%), and 15-minute load average. Each host includes its current status (active/inactive based on metrics reported in the last 10 minutes) and metadata attributes (e.g., os.type). Supports filtering via a filter expression, filtering by host status, custom groupBy to aggregate hosts by any attribute, ordering by any of the five metrics, and pagination via offset/limit. The response type is 'list' for the default host.name grouping or 'grouped_list' for custom groupBy keys. Also reports missing required metrics and whether the requested time range falls before the data retention boundary. Numeric metric fields (cpu, memory, wait, load15, diskUsage) return -1 as a sentinel when no data is available for that field.",
|
||||
Description: "Returns a paginated list of hosts with key infrastructure metrics: CPU usage (%), memory usage (%), I/O wait (%), disk usage (%), and 15-minute load average. Each host includes its current status (active/inactive based on metrics reported in the last 10 minutes) and metadata attributes (e.g., os.type). Supports filtering via a filter expression, filtering by host status, custom groupBy to aggregate hosts by any attribute, ordering by any of the five metrics, and pagination via offset/limit. The response type is 'list' for the default host.name grouping or 'grouped_list' for custom groupBy keys. Also reports whether the requested time range falls before the data retention boundary. Numeric metric fields (cpu, memory, wait, load15, diskUsage) return -1 as a sentinel when no data is available for that field.",
|
||||
Request: new(inframonitoringtypes.PostableHosts),
|
||||
RequestContentType: "application/json",
|
||||
Response: new(inframonitoringtypes.Hosts),
|
||||
@@ -35,7 +35,7 @@ func (provider *provider) addInfraMonitoringRoutes(router *mux.Router) error {
|
||||
ID: "ListPods",
|
||||
Tags: []string{"inframonitoring"},
|
||||
Summary: "List Pods for Infra Monitoring",
|
||||
Description: "Returns a paginated list of Kubernetes pods with key metrics: CPU usage, CPU request/limit utilization, memory working set, memory request/limit utilization, current pod phase (pending/running/succeeded/failed/unknown/no_data), and pod age (ms since start time). Each pod includes metadata attributes (namespace, node, workload owner such as deployment/statefulset/daemonset/job/cronjob, cluster). Supports filtering via a filter expression, custom groupBy to aggregate pods by any attribute, ordering by any of the six metrics (cpu, cpu_request, cpu_limit, memory, memory_request, memory_limit), and pagination via offset/limit. The response type is 'list' for the default k8s.pod.uid grouping (each row is one pod with its current phase) or 'grouped_list' for custom groupBy keys (each row aggregates pods in the group with per-phase counts under podCountsByPhase: { pending, running, succeeded, failed, unknown } derived from each pod's latest phase in the window). Also reports missing required metrics and whether the requested time range falls before the data retention boundary. Numeric metric fields (podCPU, podCPURequest, podCPULimit, podMemory, podMemoryRequest, podMemoryLimit, podAge) return -1 as a sentinel when no data is available for that field.",
|
||||
Description: "Returns a paginated list of Kubernetes pods with key metrics: CPU usage, CPU request/limit utilization, memory working set, memory request/limit utilization, current pod phase (pending/running/succeeded/failed/unknown/no_data), and pod age (ms since start time). Each pod includes metadata attributes (namespace, node, workload owner such as deployment/statefulset/daemonset/job/cronjob, cluster). Supports filtering via a filter expression, custom groupBy to aggregate pods by any attribute, ordering by any of the six metrics (cpu, cpu_request, cpu_limit, memory, memory_request, memory_limit), and pagination via offset/limit. The response type is 'list' for the default k8s.pod.uid grouping (each row is one pod with its current phase) or 'grouped_list' for custom groupBy keys (each row aggregates pods in the group with per-phase counts under podCountsByPhase: { pending, running, succeeded, failed, unknown } derived from each pod's latest phase in the window). Also reports whether the requested time range falls before the data retention boundary. Numeric metric fields (podCPU, podCPURequest, podCPULimit, podMemory, podMemoryRequest, podMemoryLimit, podAge) return -1 as a sentinel when no data is available for that field.",
|
||||
Request: new(inframonitoringtypes.PostablePods),
|
||||
RequestContentType: "application/json",
|
||||
Response: new(inframonitoringtypes.Pods),
|
||||
@@ -54,7 +54,7 @@ func (provider *provider) addInfraMonitoringRoutes(router *mux.Router) error {
|
||||
ID: "ListNodes",
|
||||
Tags: []string{"inframonitoring"},
|
||||
Summary: "List Nodes for Infra Monitoring",
|
||||
Description: "Returns a paginated list of Kubernetes nodes with key metrics: CPU usage, CPU allocatable, memory working set, memory allocatable, per-group nodeCountsByReadiness ({ ready, notReady } from each node's latest k8s.node.condition_ready in the window) and per-group podCountsByPhase ({ pending, running, succeeded, failed, unknown } for pods scheduled on the listed nodes). Each node includes metadata attributes (k8s.node.uid, k8s.cluster.name). The response type is 'list' for the default k8s.node.name grouping (each row is one node with its current condition string: ready / not_ready / no_data) or 'grouped_list' for custom groupBy keys (each row aggregates nodes in the group; condition stays no_data). Supports filtering via a filter expression, custom groupBy, ordering by cpu / cpu_allocatable / memory / memory_allocatable, and pagination via offset/limit. Also reports missing required metrics and whether the requested time range falls before the data retention boundary. Numeric metric fields (nodeCPU, nodeCPUAllocatable, nodeMemory, nodeMemoryAllocatable) return -1 as a sentinel when no data is available for that field.",
|
||||
Description: "Returns a paginated list of Kubernetes nodes with key metrics: CPU usage, CPU allocatable, memory working set, memory allocatable, per-group nodeCountsByReadiness ({ ready, notReady } from each node's latest k8s.node.condition_ready in the window) and per-group podCountsByPhase ({ pending, running, succeeded, failed, unknown } for pods scheduled on the listed nodes). Each node includes metadata attributes (k8s.node.uid, k8s.cluster.name). The response type is 'list' for the default k8s.node.name grouping (each row is one node with its current condition string: ready / not_ready / no_data) or 'grouped_list' for custom groupBy keys (each row aggregates nodes in the group; condition stays no_data). Supports filtering via a filter expression, custom groupBy, ordering by cpu / cpu_allocatable / memory / memory_allocatable, and pagination via offset/limit. Also reports whether the requested time range falls before the data retention boundary. Numeric metric fields (nodeCPU, nodeCPUAllocatable, nodeMemory, nodeMemoryAllocatable) return -1 as a sentinel when no data is available for that field.",
|
||||
Request: new(inframonitoringtypes.PostableNodes),
|
||||
RequestContentType: "application/json",
|
||||
Response: new(inframonitoringtypes.Nodes),
|
||||
@@ -73,7 +73,7 @@ func (provider *provider) addInfraMonitoringRoutes(router *mux.Router) error {
|
||||
ID: "ListNamespaces",
|
||||
Tags: []string{"inframonitoring"},
|
||||
Summary: "List Namespaces for Infra Monitoring",
|
||||
Description: "Returns a paginated list of Kubernetes namespaces with key aggregated pod metrics: CPU usage and memory working set (summed across pods in the group), plus per-group podCountsByPhase ({ pending, running, succeeded, failed, unknown } from each pod's latest k8s.pod.phase value in the window). Each namespace includes metadata attributes (k8s.namespace.name, k8s.cluster.name). The response type is 'list' for the default k8s.namespace.name grouping or 'grouped_list' for custom groupBy keys; in both modes every row aggregates pods in the group. Supports filtering via a filter expression, custom groupBy, ordering by cpu / memory, and pagination via offset/limit. Also reports missing required metrics and whether the requested time range falls before the data retention boundary. Numeric metric fields (namespaceCPU, namespaceMemory) return -1 as a sentinel when no data is available for that field.",
|
||||
Description: "Returns a paginated list of Kubernetes namespaces with key aggregated pod metrics: CPU usage and memory working set (summed across pods in the group), plus per-group podCountsByPhase ({ pending, running, succeeded, failed, unknown } from each pod's latest k8s.pod.phase value in the window). Each namespace includes metadata attributes (k8s.namespace.name, k8s.cluster.name). The response type is 'list' for the default k8s.namespace.name grouping or 'grouped_list' for custom groupBy keys; in both modes every row aggregates pods in the group. Supports filtering via a filter expression, custom groupBy, ordering by cpu / memory, and pagination via offset/limit. Also reports whether the requested time range falls before the data retention boundary. Numeric metric fields (namespaceCPU, namespaceMemory) return -1 as a sentinel when no data is available for that field.",
|
||||
Request: new(inframonitoringtypes.PostableNamespaces),
|
||||
RequestContentType: "application/json",
|
||||
Response: new(inframonitoringtypes.Namespaces),
|
||||
@@ -92,7 +92,7 @@ func (provider *provider) addInfraMonitoringRoutes(router *mux.Router) error {
|
||||
ID: "ListClusters",
|
||||
Tags: []string{"inframonitoring"},
|
||||
Summary: "List Clusters for Infra Monitoring",
|
||||
Description: "Returns a paginated list of Kubernetes clusters with key aggregated metrics derived by summing per-node values within the group: CPU usage, CPU allocatable, memory working set, memory allocatable. Each row also reports per-group nodeCountsByReadiness ({ ready, notReady } from each node's latest k8s.node.condition_ready value) and per-group podCountsByPhase ({ pending, running, succeeded, failed, unknown } from each pod's latest k8s.pod.phase value). Each cluster includes metadata attributes (k8s.cluster.name). The response type is 'list' for the default k8s.cluster.name grouping or 'grouped_list' for custom groupBy keys; in both modes every row aggregates nodes and pods in the group. Supports filtering via a filter expression, custom groupBy, ordering by cpu / cpu_allocatable / memory / memory_allocatable, and pagination via offset/limit. Also reports missing required metrics and whether the requested time range falls before the data retention boundary. Numeric metric fields (clusterCPU, clusterCPUAllocatable, clusterMemory, clusterMemoryAllocatable) return -1 as a sentinel when no data is available for that field.",
|
||||
Description: "Returns a paginated list of Kubernetes clusters with key aggregated metrics derived by summing per-node values within the group: CPU usage, CPU allocatable, memory working set, memory allocatable. Each row also reports per-group nodeCountsByReadiness ({ ready, notReady } from each node's latest k8s.node.condition_ready value) and per-group podCountsByPhase ({ pending, running, succeeded, failed, unknown } from each pod's latest k8s.pod.phase value). Each cluster includes metadata attributes (k8s.cluster.name). The response type is 'list' for the default k8s.cluster.name grouping or 'grouped_list' for custom groupBy keys; in both modes every row aggregates nodes and pods in the group. Supports filtering via a filter expression, custom groupBy, ordering by cpu / cpu_allocatable / memory / memory_allocatable, and pagination via offset/limit. Also reports whether the requested time range falls before the data retention boundary. Numeric metric fields (clusterCPU, clusterCPUAllocatable, clusterMemory, clusterMemoryAllocatable) return -1 as a sentinel when no data is available for that field.",
|
||||
Request: new(inframonitoringtypes.PostableClusters),
|
||||
RequestContentType: "application/json",
|
||||
Response: new(inframonitoringtypes.Clusters),
|
||||
@@ -111,7 +111,7 @@ func (provider *provider) addInfraMonitoringRoutes(router *mux.Router) error {
|
||||
ID: "ListVolumes",
|
||||
Tags: []string{"inframonitoring"},
|
||||
Summary: "List Volumes for Infra Monitoring",
|
||||
Description: "Returns a paginated list of Kubernetes persistent volume claims (PVCs) with key volume metrics: available bytes, capacity bytes, usage (capacity - available), inodes, free inodes, and used inodes. Each row also includes metadata attributes (k8s.persistentvolumeclaim.name, k8s.pod.uid, k8s.pod.name, k8s.namespace.name, k8s.node.name, k8s.statefulset.name, k8s.cluster.name). Supports filtering via a filter expression, custom groupBy to aggregate volumes by any attribute, ordering by any of the six metrics (available, capacity, usage, inodes, inodes_free, inodes_used), and pagination via offset/limit. The response type is 'list' for the default k8s.persistentvolumeclaim.name grouping or 'grouped_list' for custom groupBy keys; in both modes every row aggregates volumes in the group. Also reports missing required metrics and whether the requested time range falls before the data retention boundary. Numeric metric fields (volumeAvailable, volumeCapacity, volumeUsage, volumeInodes, volumeInodesFree, volumeInodesUsed) return -1 as a sentinel when no data is available for that field.",
|
||||
Description: "Returns a paginated list of Kubernetes persistent volume claims (PVCs) with key volume metrics: available bytes, capacity bytes, usage (capacity - available), inodes, free inodes, and used inodes. Each row also includes metadata attributes (k8s.persistentvolumeclaim.name, k8s.pod.uid, k8s.pod.name, k8s.namespace.name, k8s.node.name, k8s.statefulset.name, k8s.cluster.name). Supports filtering via a filter expression, custom groupBy to aggregate volumes by any attribute, ordering by any of the six metrics (available, capacity, usage, inodes, inodes_free, inodes_used), and pagination via offset/limit. The response type is 'list' for the default k8s.persistentvolumeclaim.name grouping or 'grouped_list' for custom groupBy keys; in both modes every row aggregates volumes in the group. Also reports whether the requested time range falls before the data retention boundary. Numeric metric fields (volumeAvailable, volumeCapacity, volumeUsage, volumeInodes, volumeInodesFree, volumeInodesUsed) return -1 as a sentinel when no data is available for that field.",
|
||||
Request: new(inframonitoringtypes.PostableVolumes),
|
||||
RequestContentType: "application/json",
|
||||
Response: new(inframonitoringtypes.Volumes),
|
||||
@@ -130,7 +130,7 @@ func (provider *provider) addInfraMonitoringRoutes(router *mux.Router) error {
|
||||
ID: "ListDeployments",
|
||||
Tags: []string{"inframonitoring"},
|
||||
Summary: "List Deployments for Infra Monitoring",
|
||||
Description: "Returns a paginated list of Kubernetes Deployments with key aggregated pod metrics: CPU usage and memory working set summed across pods owned by the deployment, plus average CPU/memory request and limit utilization (deploymentCPURequest, deploymentCPULimit, deploymentMemoryRequest, deploymentMemoryLimit). Each row also reports the latest known desiredPods (k8s.deployment.desired) and availablePods (k8s.deployment.available) replica counts and per-group podCountsByPhase ({ pending, running, succeeded, failed, unknown } from each pod's latest k8s.pod.phase value). Each deployment includes metadata attributes (k8s.deployment.name, k8s.namespace.name, k8s.cluster.name). The response type is 'list' for the default k8s.deployment.name grouping or 'grouped_list' for custom groupBy keys; in both modes every row aggregates pods owned by deployments in the group. Supports filtering via a filter expression, custom groupBy, ordering by cpu / cpu_request / cpu_limit / memory / memory_request / memory_limit / desired_pods / available_pods, and pagination via offset/limit. Also reports missing required metrics and whether the requested time range falls before the data retention boundary. Numeric metric fields (deploymentCPU, deploymentCPURequest, deploymentCPULimit, deploymentMemory, deploymentMemoryRequest, deploymentMemoryLimit, desiredPods, availablePods) return -1 as a sentinel when no data is available for that field.",
|
||||
Description: "Returns a paginated list of Kubernetes Deployments with key aggregated pod metrics: CPU usage and memory working set summed across pods owned by the deployment, plus average CPU/memory request and limit utilization (deploymentCPURequest, deploymentCPULimit, deploymentMemoryRequest, deploymentMemoryLimit). Each row also reports the latest known desiredPods (k8s.deployment.desired) and availablePods (k8s.deployment.available) replica counts and per-group podCountsByPhase ({ pending, running, succeeded, failed, unknown } from each pod's latest k8s.pod.phase value). Each deployment includes metadata attributes (k8s.deployment.name, k8s.namespace.name, k8s.cluster.name). The response type is 'list' for the default k8s.deployment.name grouping or 'grouped_list' for custom groupBy keys; in both modes every row aggregates pods owned by deployments in the group. Supports filtering via a filter expression, custom groupBy, ordering by cpu / cpu_request / cpu_limit / memory / memory_request / memory_limit / desired_pods / available_pods, and pagination via offset/limit. Also reports whether the requested time range falls before the data retention boundary. Numeric metric fields (deploymentCPU, deploymentCPURequest, deploymentCPULimit, deploymentMemory, deploymentMemoryRequest, deploymentMemoryLimit, desiredPods, availablePods) return -1 as a sentinel when no data is available for that field.",
|
||||
Request: new(inframonitoringtypes.PostableDeployments),
|
||||
RequestContentType: "application/json",
|
||||
Response: new(inframonitoringtypes.Deployments),
|
||||
@@ -149,7 +149,7 @@ func (provider *provider) addInfraMonitoringRoutes(router *mux.Router) error {
|
||||
ID: "ListStatefulSets",
|
||||
Tags: []string{"inframonitoring"},
|
||||
Summary: "List StatefulSets for Infra Monitoring",
|
||||
Description: "Returns a paginated list of Kubernetes StatefulSets with key aggregated pod metrics: CPU usage and memory working set summed across pods owned by the statefulset, plus average CPU/memory request and limit utilization (statefulSetCPURequest, statefulSetCPULimit, statefulSetMemoryRequest, statefulSetMemoryLimit). Each row also reports the latest known desiredPods (k8s.statefulset.desired_pods) and currentPods (k8s.statefulset.current_pods) replica counts and per-group podCountsByPhase ({ pending, running, succeeded, failed, unknown } from each pod's latest k8s.pod.phase value). Each statefulset includes metadata attributes (k8s.statefulset.name, k8s.namespace.name, k8s.cluster.name). The response type is 'list' for the default k8s.statefulset.name grouping or 'grouped_list' for custom groupBy keys; in both modes every row aggregates pods owned by statefulsets in the group. Supports filtering via a filter expression, custom groupBy, ordering by cpu / cpu_request / cpu_limit / memory / memory_request / memory_limit / desired_pods / current_pods, and pagination via offset/limit. Also reports missing required metrics and whether the requested time range falls before the data retention boundary. Numeric metric fields (statefulSetCPU, statefulSetCPURequest, statefulSetCPULimit, statefulSetMemory, statefulSetMemoryRequest, statefulSetMemoryLimit, desiredPods, currentPods) return -1 as a sentinel when no data is available for that field.",
|
||||
Description: "Returns a paginated list of Kubernetes StatefulSets with key aggregated pod metrics: CPU usage and memory working set summed across pods owned by the statefulset, plus average CPU/memory request and limit utilization (statefulSetCPURequest, statefulSetCPULimit, statefulSetMemoryRequest, statefulSetMemoryLimit). Each row also reports the latest known desiredPods (k8s.statefulset.desired_pods) and currentPods (k8s.statefulset.current_pods) replica counts and per-group podCountsByPhase ({ pending, running, succeeded, failed, unknown } from each pod's latest k8s.pod.phase value). Each statefulset includes metadata attributes (k8s.statefulset.name, k8s.namespace.name, k8s.cluster.name). The response type is 'list' for the default k8s.statefulset.name grouping or 'grouped_list' for custom groupBy keys; in both modes every row aggregates pods owned by statefulsets in the group. Supports filtering via a filter expression, custom groupBy, ordering by cpu / cpu_request / cpu_limit / memory / memory_request / memory_limit / desired_pods / current_pods, and pagination via offset/limit. Also reports whether the requested time range falls before the data retention boundary. Numeric metric fields (statefulSetCPU, statefulSetCPURequest, statefulSetCPULimit, statefulSetMemory, statefulSetMemoryRequest, statefulSetMemoryLimit, desiredPods, currentPods) return -1 as a sentinel when no data is available for that field.",
|
||||
Request: new(inframonitoringtypes.PostableStatefulSets),
|
||||
RequestContentType: "application/json",
|
||||
Response: new(inframonitoringtypes.StatefulSets),
|
||||
@@ -168,7 +168,7 @@ func (provider *provider) addInfraMonitoringRoutes(router *mux.Router) error {
|
||||
ID: "ListJobs",
|
||||
Tags: []string{"inframonitoring"},
|
||||
Summary: "List Jobs for Infra Monitoring",
|
||||
Description: "Returns a paginated list of Kubernetes Jobs with key aggregated pod metrics: CPU usage and memory working set summed across pods owned by the job, plus average CPU/memory request and limit utilization (jobCPURequest, jobCPULimit, jobMemoryRequest, jobMemoryLimit). Each row also reports the latest known job-level counters from kube-state-metrics: desiredSuccessfulPods (k8s.job.desired_successful_pods, the target completion count), activePods (k8s.job.active_pods), failedPods (k8s.job.failed_pods, cumulative across the lifetime of the job), and successfulPods (k8s.job.successful_pods, cumulative). It also reports per-group podCountsByPhase ({ pending, running, succeeded, failed, unknown } from each pod's latest k8s.pod.phase value); note podCountsByPhase.failed (current pod-phase) is distinct from failedPods (cumulative job kube-state-metric). Each job includes metadata attributes (k8s.job.name, k8s.namespace.name, k8s.cluster.name). The response type is 'list' for the default k8s.job.name grouping or 'grouped_list' for custom groupBy keys; in both modes every row aggregates pods owned by jobs in the group. Supports filtering via a filter expression, custom groupBy, ordering by cpu / cpu_request / cpu_limit / memory / memory_request / memory_limit / desired_successful_pods / active_pods / failed_pods / successful_pods, and pagination via offset/limit. Also reports missing required metrics and whether the requested time range falls before the data retention boundary. Numeric metric fields (jobCPU, jobCPURequest, jobCPULimit, jobMemory, jobMemoryRequest, jobMemoryLimit, desiredSuccessfulPods, activePods, failedPods, successfulPods) return -1 as a sentinel when no data is available for that field.",
|
||||
Description: "Returns a paginated list of Kubernetes Jobs with key aggregated pod metrics: CPU usage and memory working set summed across pods owned by the job, plus average CPU/memory request and limit utilization (jobCPURequest, jobCPULimit, jobMemoryRequest, jobMemoryLimit). Each row also reports the latest known job-level counters from kube-state-metrics: desiredSuccessfulPods (k8s.job.desired_successful_pods, the target completion count), activePods (k8s.job.active_pods), failedPods (k8s.job.failed_pods, cumulative across the lifetime of the job), and successfulPods (k8s.job.successful_pods, cumulative). It also reports per-group podCountsByPhase ({ pending, running, succeeded, failed, unknown } from each pod's latest k8s.pod.phase value); note podCountsByPhase.failed (current pod-phase) is distinct from failedPods (cumulative job kube-state-metric). Each job includes metadata attributes (k8s.job.name, k8s.namespace.name, k8s.cluster.name). The response type is 'list' for the default k8s.job.name grouping or 'grouped_list' for custom groupBy keys; in both modes every row aggregates pods owned by jobs in the group. Supports filtering via a filter expression, custom groupBy, ordering by cpu / cpu_request / cpu_limit / memory / memory_request / memory_limit / desired_successful_pods / active_pods / failed_pods / successful_pods, and pagination via offset/limit. Also reports whether the requested time range falls before the data retention boundary. Numeric metric fields (jobCPU, jobCPURequest, jobCPULimit, jobMemory, jobMemoryRequest, jobMemoryLimit, desiredSuccessfulPods, activePods, failedPods, successfulPods) return -1 as a sentinel when no data is available for that field.",
|
||||
Request: new(inframonitoringtypes.PostableJobs),
|
||||
RequestContentType: "application/json",
|
||||
Response: new(inframonitoringtypes.Jobs),
|
||||
@@ -187,7 +187,7 @@ func (provider *provider) addInfraMonitoringRoutes(router *mux.Router) error {
|
||||
ID: "ListDaemonSets",
|
||||
Tags: []string{"inframonitoring"},
|
||||
Summary: "List DaemonSets for Infra Monitoring",
|
||||
Description: "Returns a paginated list of Kubernetes DaemonSets with key aggregated pod metrics: CPU usage and memory working set summed across pods owned by the daemonset, plus average CPU/memory request and limit utilization (daemonSetCPURequest, daemonSetCPULimit, daemonSetMemoryRequest, daemonSetMemoryLimit). Each row also reports the latest known node-level counters from kube-state-metrics: desiredNodes (k8s.daemonset.desired_scheduled_nodes, the number of nodes the daemonset wants to run on) and currentNodes (k8s.daemonset.current_scheduled_nodes, the number of nodes the daemonset currently runs on) — note these are node counts, not pod counts. It also reports per-group podCountsByPhase ({ pending, running, succeeded, failed, unknown } from each pod's latest k8s.pod.phase value). Each daemonset includes metadata attributes (k8s.daemonset.name, k8s.namespace.name, k8s.cluster.name). The response type is 'list' for the default k8s.daemonset.name grouping or 'grouped_list' for custom groupBy keys; in both modes every row aggregates pods owned by daemonsets in the group. Supports filtering via a filter expression, custom groupBy, ordering by cpu / cpu_request / cpu_limit / memory / memory_request / memory_limit / desired_nodes / current_nodes, and pagination via offset/limit. Also reports missing required metrics and whether the requested time range falls before the data retention boundary. Numeric metric fields (daemonSetCPU, daemonSetCPURequest, daemonSetCPULimit, daemonSetMemory, daemonSetMemoryRequest, daemonSetMemoryLimit, desiredNodes, currentNodes) return -1 as a sentinel when no data is available for that field.",
|
||||
Description: "Returns a paginated list of Kubernetes DaemonSets with key aggregated pod metrics: CPU usage and memory working set summed across pods owned by the daemonset, plus average CPU/memory request and limit utilization (daemonSetCPURequest, daemonSetCPULimit, daemonSetMemoryRequest, daemonSetMemoryLimit). Each row also reports the latest known node-level counters from kube-state-metrics: desiredNodes (k8s.daemonset.desired_scheduled_nodes, the number of nodes the daemonset wants to run on) and currentNodes (k8s.daemonset.current_scheduled_nodes, the number of nodes the daemonset currently runs on) — note these are node counts, not pod counts. It also reports per-group podCountsByPhase ({ pending, running, succeeded, failed, unknown } from each pod's latest k8s.pod.phase value). Each daemonset includes metadata attributes (k8s.daemonset.name, k8s.namespace.name, k8s.cluster.name). The response type is 'list' for the default k8s.daemonset.name grouping or 'grouped_list' for custom groupBy keys; in both modes every row aggregates pods owned by daemonsets in the group. Supports filtering via a filter expression, custom groupBy, ordering by cpu / cpu_request / cpu_limit / memory / memory_request / memory_limit / desired_nodes / current_nodes, and pagination via offset/limit. Also reports whether the requested time range falls before the data retention boundary. Numeric metric fields (daemonSetCPU, daemonSetCPURequest, daemonSetCPULimit, daemonSetMemory, daemonSetMemoryRequest, daemonSetMemoryLimit, desiredNodes, currentNodes) return -1 as a sentinel when no data is available for that field.",
|
||||
Request: new(inframonitoringtypes.PostableDaemonSets),
|
||||
RequestContentType: "application/json",
|
||||
Response: new(inframonitoringtypes.DaemonSets),
|
||||
|
||||
@@ -413,64 +413,27 @@ func (m *module) buildFilterClause(ctx context.Context, filter *qbtypes.Filter,
|
||||
// NOTE: this method is not specific to infra monitoring — it queries attributes_metadata generically.
|
||||
// Consider moving to telemetryMetaStore when a second use case emerges.
|
||||
//
|
||||
// getMetricsExistenceAndEarliestTime checks which of the given metric names have been
|
||||
// reported. It returns a list of missing metrics (those not found or with zero count)
|
||||
// and the earliest first-reported timestamp across all present metrics.
|
||||
// When all metrics are missing, minFirstReportedUnixMilli is 0.
|
||||
// TODO(nikhilmantri0902, srikanthccv): This method was designed this way because querier errors if any of the metrics
|
||||
// in the querier list was never sent, the QueryRange call throws not found error. Modify this method, if QueryRange
|
||||
// behaviour changes towards this.
|
||||
func (m *module) getMetricsExistenceAndEarliestTime(ctx context.Context, metricNames []string) ([]string, uint64, error) {
|
||||
// getEarliestMetricTime returns the earliest first_reported_unix_milli across the
|
||||
// given metric names. It is used solely for the end-time-before-retention check.
|
||||
// When none of the metrics have ever been reported, it returns 0.
|
||||
func (m *module) getEarliestMetricTime(ctx context.Context, metricNames []string) (uint64, error) {
|
||||
if len(metricNames) == 0 {
|
||||
return nil, 0, nil
|
||||
return 0, nil
|
||||
}
|
||||
|
||||
sb := sqlbuilder.NewSelectBuilder()
|
||||
sb.Select("metric_name", "count(*) AS cnt", "min(first_reported_unix_milli) AS min_first_reported")
|
||||
sb.Select("min(first_reported_unix_milli) AS min_first_reported")
|
||||
sb.From(fmt.Sprintf("%s.%s", telemetrymetrics.DBName, telemetrymetrics.AttributesMetadataTableName))
|
||||
sb.Where(sb.In("metric_name", sqlbuilder.List(metricNames)))
|
||||
sb.GroupBy("metric_name")
|
||||
|
||||
query, args := sb.BuildWithFlavor(sqlbuilder.ClickHouse)
|
||||
|
||||
rows, err := m.telemetryStore.ClickhouseDB().Query(ctx, query, args...)
|
||||
if err != nil {
|
||||
return nil, 0, err
|
||||
}
|
||||
defer rows.Close()
|
||||
|
||||
type metricInfo struct {
|
||||
count uint64
|
||||
minFirstReported uint64
|
||||
}
|
||||
found := make(map[string]metricInfo, len(metricNames))
|
||||
|
||||
for rows.Next() {
|
||||
var name string
|
||||
var cnt, minFR uint64
|
||||
if err := rows.Scan(&name, &cnt, &minFR); err != nil {
|
||||
return nil, 0, err
|
||||
}
|
||||
found[name] = metricInfo{count: cnt, minFirstReported: minFR}
|
||||
}
|
||||
if err := rows.Err(); err != nil {
|
||||
return nil, 0, err
|
||||
var minFirstReported uint64
|
||||
if err := m.telemetryStore.ClickhouseDB().QueryRow(ctx, query, args...).Scan(&minFirstReported); err != nil {
|
||||
return 0, err
|
||||
}
|
||||
|
||||
var missingMetrics []string
|
||||
var globalMinFirstReported uint64
|
||||
for _, name := range metricNames {
|
||||
info, ok := found[name]
|
||||
if !ok || info.count == 0 {
|
||||
missingMetrics = append(missingMetrics, name)
|
||||
continue
|
||||
}
|
||||
if globalMinFirstReported == 0 || info.minFirstReported < globalMinFirstReported {
|
||||
globalMinFirstReported = info.minFirstReported
|
||||
}
|
||||
}
|
||||
|
||||
return missingMetrics, globalMinFirstReported, nil
|
||||
return minFirstReported, nil
|
||||
}
|
||||
|
||||
// getMetadata fetches the latest values of additionalCols for each unique combination of groupBy keys,
|
||||
|
||||
@@ -78,26 +78,18 @@ func (m *module) ListHosts(ctx context.Context, orgID valuer.UUID, req *inframon
|
||||
resp.Type = inframonitoringtypes.ResponseTypeGroupedList
|
||||
}
|
||||
|
||||
// 1. Check which required metrics exist and get earliest retention time.
|
||||
// If any required metric is missing, return early with the list of missing metrics.
|
||||
// 2. If metrics exist but req.End is before the earliest reported time, return early with endTimeBeforeRetention=true.
|
||||
missingMetrics, minFirstReportedUnixMilli, err := m.getMetricsExistenceAndEarliestTime(ctx, hostsTableMetricNamesList)
|
||||
// If req.End is before the earliest reported time for these metrics, return early
|
||||
// with endTimeBeforeRetention=true.
|
||||
minFirstReportedUnixMilli, err := m.getEarliestMetricTime(ctx, hostsTableMetricNamesList)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
if len(missingMetrics) > 0 {
|
||||
resp.RequiredMetricsCheck = inframonitoringtypes.RequiredMetricsCheck{MissingMetrics: missingMetrics}
|
||||
resp.Records = []inframonitoringtypes.HostRecord{}
|
||||
resp.Total = 0
|
||||
return resp, nil
|
||||
}
|
||||
if req.End < int64(minFirstReportedUnixMilli) {
|
||||
resp.EndTimeBeforeRetention = true
|
||||
resp.Records = []inframonitoringtypes.HostRecord{}
|
||||
resp.Total = 0
|
||||
return resp, nil
|
||||
}
|
||||
resp.RequiredMetricsCheck = inframonitoringtypes.RequiredMetricsCheck{MissingMetrics: []string{}}
|
||||
|
||||
// TOD(nikhilmantri0902): replace this separate ClickHouse query with a sub-query inside the main query builder query
|
||||
// once QB supports sub-queries.
|
||||
@@ -191,23 +183,16 @@ func (m *module) ListPods(ctx context.Context, orgID valuer.UUID, req *inframoni
|
||||
resp.Type = inframonitoringtypes.ResponseTypeGroupedList
|
||||
}
|
||||
|
||||
missingMetrics, minFirstReportedUnixMilli, err := m.getMetricsExistenceAndEarliestTime(ctx, podsTableMetricNamesList)
|
||||
minFirstReportedUnixMilli, err := m.getEarliestMetricTime(ctx, podsTableMetricNamesList)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
if len(missingMetrics) > 0 {
|
||||
resp.RequiredMetricsCheck = inframonitoringtypes.RequiredMetricsCheck{MissingMetrics: missingMetrics}
|
||||
resp.Records = []inframonitoringtypes.PodRecord{}
|
||||
resp.Total = 0
|
||||
return resp, nil
|
||||
}
|
||||
if req.End < int64(minFirstReportedUnixMilli) {
|
||||
resp.EndTimeBeforeRetention = true
|
||||
resp.Records = []inframonitoringtypes.PodRecord{}
|
||||
resp.Total = 0
|
||||
return resp, nil
|
||||
}
|
||||
resp.RequiredMetricsCheck = inframonitoringtypes.RequiredMetricsCheck{MissingMetrics: []string{}}
|
||||
|
||||
metadataMap, err := m.getPodsTableMetadata(ctx, req)
|
||||
if err != nil {
|
||||
@@ -276,23 +261,16 @@ func (m *module) ListNodes(ctx context.Context, orgID valuer.UUID, req *inframon
|
||||
resp.Type = inframonitoringtypes.ResponseTypeGroupedList
|
||||
}
|
||||
|
||||
missingMetrics, minFirstReportedUnixMilli, err := m.getMetricsExistenceAndEarliestTime(ctx, nodesTableMetricNamesList)
|
||||
minFirstReportedUnixMilli, err := m.getEarliestMetricTime(ctx, nodesTableMetricNamesList)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
if len(missingMetrics) > 0 {
|
||||
resp.RequiredMetricsCheck = inframonitoringtypes.RequiredMetricsCheck{MissingMetrics: missingMetrics}
|
||||
resp.Records = []inframonitoringtypes.NodeRecord{}
|
||||
resp.Total = 0
|
||||
return resp, nil
|
||||
}
|
||||
if req.End < int64(minFirstReportedUnixMilli) {
|
||||
resp.EndTimeBeforeRetention = true
|
||||
resp.Records = []inframonitoringtypes.NodeRecord{}
|
||||
resp.Total = 0
|
||||
return resp, nil
|
||||
}
|
||||
resp.RequiredMetricsCheck = inframonitoringtypes.RequiredMetricsCheck{MissingMetrics: []string{}}
|
||||
|
||||
metadataMap, err := m.getNodesTableMetadata(ctx, req)
|
||||
if err != nil {
|
||||
@@ -366,23 +344,16 @@ func (m *module) ListNamespaces(ctx context.Context, orgID valuer.UUID, req *inf
|
||||
resp.Type = inframonitoringtypes.ResponseTypeGroupedList
|
||||
}
|
||||
|
||||
missingMetrics, minFirstReportedUnixMilli, err := m.getMetricsExistenceAndEarliestTime(ctx, namespacesTableMetricNamesList)
|
||||
minFirstReportedUnixMilli, err := m.getEarliestMetricTime(ctx, namespacesTableMetricNamesList)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
if len(missingMetrics) > 0 {
|
||||
resp.RequiredMetricsCheck = inframonitoringtypes.RequiredMetricsCheck{MissingMetrics: missingMetrics}
|
||||
resp.Records = []inframonitoringtypes.NamespaceRecord{}
|
||||
resp.Total = 0
|
||||
return resp, nil
|
||||
}
|
||||
if req.End < int64(minFirstReportedUnixMilli) {
|
||||
resp.EndTimeBeforeRetention = true
|
||||
resp.Records = []inframonitoringtypes.NamespaceRecord{}
|
||||
resp.Total = 0
|
||||
return resp, nil
|
||||
}
|
||||
resp.RequiredMetricsCheck = inframonitoringtypes.RequiredMetricsCheck{MissingMetrics: []string{}}
|
||||
|
||||
metadataMap, err := m.getNamespacesTableMetadata(ctx, req)
|
||||
if err != nil {
|
||||
@@ -450,23 +421,16 @@ func (m *module) ListClusters(ctx context.Context, orgID valuer.UUID, req *infra
|
||||
resp.Type = inframonitoringtypes.ResponseTypeGroupedList
|
||||
}
|
||||
|
||||
missingMetrics, minFirstReportedUnixMilli, err := m.getMetricsExistenceAndEarliestTime(ctx, clustersTableMetricNamesList)
|
||||
minFirstReportedUnixMilli, err := m.getEarliestMetricTime(ctx, clustersTableMetricNamesList)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
if len(missingMetrics) > 0 {
|
||||
resp.RequiredMetricsCheck = inframonitoringtypes.RequiredMetricsCheck{MissingMetrics: missingMetrics}
|
||||
resp.Records = []inframonitoringtypes.ClusterRecord{}
|
||||
resp.Total = 0
|
||||
return resp, nil
|
||||
}
|
||||
if req.End < int64(minFirstReportedUnixMilli) {
|
||||
resp.EndTimeBeforeRetention = true
|
||||
resp.Records = []inframonitoringtypes.ClusterRecord{}
|
||||
resp.Total = 0
|
||||
return resp, nil
|
||||
}
|
||||
resp.RequiredMetricsCheck = inframonitoringtypes.RequiredMetricsCheck{MissingMetrics: []string{}}
|
||||
|
||||
metadataMap, err := m.getClustersTableMetadata(ctx, req)
|
||||
if err != nil {
|
||||
@@ -547,23 +511,16 @@ func (m *module) ListVolumes(ctx context.Context, orgID valuer.UUID, req *infram
|
||||
}
|
||||
req.Filter.Expression = mergeFilterExpressions(volumesBaseFilterExpr, req.Filter.Expression)
|
||||
|
||||
missingMetrics, minFirstReportedUnixMilli, err := m.getMetricsExistenceAndEarliestTime(ctx, volumesTableMetricNamesList)
|
||||
minFirstReportedUnixMilli, err := m.getEarliestMetricTime(ctx, volumesTableMetricNamesList)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
if len(missingMetrics) > 0 {
|
||||
resp.RequiredMetricsCheck = inframonitoringtypes.RequiredMetricsCheck{MissingMetrics: missingMetrics}
|
||||
resp.Records = []inframonitoringtypes.VolumeRecord{}
|
||||
resp.Total = 0
|
||||
return resp, nil
|
||||
}
|
||||
if req.End < int64(minFirstReportedUnixMilli) {
|
||||
resp.EndTimeBeforeRetention = true
|
||||
resp.Records = []inframonitoringtypes.VolumeRecord{}
|
||||
resp.Total = 0
|
||||
return resp, nil
|
||||
}
|
||||
resp.RequiredMetricsCheck = inframonitoringtypes.RequiredMetricsCheck{MissingMetrics: []string{}}
|
||||
|
||||
metadataMap, err := m.getVolumesTableMetadata(ctx, req)
|
||||
if err != nil {
|
||||
@@ -632,23 +589,16 @@ func (m *module) ListDeployments(ctx context.Context, orgID valuer.UUID, req *in
|
||||
}
|
||||
req.Filter.Expression = mergeFilterExpressions(deploymentsBaseFilterExpr, req.Filter.Expression)
|
||||
|
||||
missingMetrics, minFirstReportedUnixMilli, err := m.getMetricsExistenceAndEarliestTime(ctx, deploymentsTableMetricNamesList)
|
||||
minFirstReportedUnixMilli, err := m.getEarliestMetricTime(ctx, deploymentsTableMetricNamesList)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
if len(missingMetrics) > 0 {
|
||||
resp.RequiredMetricsCheck = inframonitoringtypes.RequiredMetricsCheck{MissingMetrics: missingMetrics}
|
||||
resp.Records = []inframonitoringtypes.DeploymentRecord{}
|
||||
resp.Total = 0
|
||||
return resp, nil
|
||||
}
|
||||
if req.End < int64(minFirstReportedUnixMilli) {
|
||||
resp.EndTimeBeforeRetention = true
|
||||
resp.Records = []inframonitoringtypes.DeploymentRecord{}
|
||||
resp.Total = 0
|
||||
return resp, nil
|
||||
}
|
||||
resp.RequiredMetricsCheck = inframonitoringtypes.RequiredMetricsCheck{MissingMetrics: []string{}}
|
||||
|
||||
metadataMap, err := m.getDeploymentsTableMetadata(ctx, req)
|
||||
if err != nil {
|
||||
@@ -722,23 +672,16 @@ func (m *module) ListStatefulSets(ctx context.Context, orgID valuer.UUID, req *i
|
||||
}
|
||||
req.Filter.Expression = mergeFilterExpressions(statefulSetsBaseFilterExpr, req.Filter.Expression)
|
||||
|
||||
missingMetrics, minFirstReportedUnixMilli, err := m.getMetricsExistenceAndEarliestTime(ctx, statefulSetsTableMetricNamesList)
|
||||
minFirstReportedUnixMilli, err := m.getEarliestMetricTime(ctx, statefulSetsTableMetricNamesList)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
if len(missingMetrics) > 0 {
|
||||
resp.RequiredMetricsCheck = inframonitoringtypes.RequiredMetricsCheck{MissingMetrics: missingMetrics}
|
||||
resp.Records = []inframonitoringtypes.StatefulSetRecord{}
|
||||
resp.Total = 0
|
||||
return resp, nil
|
||||
}
|
||||
if req.End < int64(minFirstReportedUnixMilli) {
|
||||
resp.EndTimeBeforeRetention = true
|
||||
resp.Records = []inframonitoringtypes.StatefulSetRecord{}
|
||||
resp.Total = 0
|
||||
return resp, nil
|
||||
}
|
||||
resp.RequiredMetricsCheck = inframonitoringtypes.RequiredMetricsCheck{MissingMetrics: []string{}}
|
||||
|
||||
metadataMap, err := m.getStatefulSetsTableMetadata(ctx, req)
|
||||
if err != nil {
|
||||
@@ -814,23 +757,16 @@ func (m *module) ListJobs(ctx context.Context, orgID valuer.UUID, req *inframoni
|
||||
}
|
||||
req.Filter.Expression = mergeFilterExpressions(jobsBaseFilterExpr, req.Filter.Expression)
|
||||
|
||||
missingMetrics, minFirstReportedUnixMilli, err := m.getMetricsExistenceAndEarliestTime(ctx, jobsTableMetricNamesList)
|
||||
minFirstReportedUnixMilli, err := m.getEarliestMetricTime(ctx, jobsTableMetricNamesList)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
if len(missingMetrics) > 0 {
|
||||
resp.RequiredMetricsCheck = inframonitoringtypes.RequiredMetricsCheck{MissingMetrics: missingMetrics}
|
||||
resp.Records = []inframonitoringtypes.JobRecord{}
|
||||
resp.Total = 0
|
||||
return resp, nil
|
||||
}
|
||||
if req.End < int64(minFirstReportedUnixMilli) {
|
||||
resp.EndTimeBeforeRetention = true
|
||||
resp.Records = []inframonitoringtypes.JobRecord{}
|
||||
resp.Total = 0
|
||||
return resp, nil
|
||||
}
|
||||
resp.RequiredMetricsCheck = inframonitoringtypes.RequiredMetricsCheck{MissingMetrics: []string{}}
|
||||
|
||||
metadataMap, err := m.getJobsTableMetadata(ctx, req)
|
||||
if err != nil {
|
||||
@@ -906,23 +842,16 @@ func (m *module) ListDaemonSets(ctx context.Context, orgID valuer.UUID, req *inf
|
||||
}
|
||||
req.Filter.Expression = mergeFilterExpressions(daemonSetsBaseFilterExpr, req.Filter.Expression)
|
||||
|
||||
missingMetrics, minFirstReportedUnixMilli, err := m.getMetricsExistenceAndEarliestTime(ctx, daemonSetsTableMetricNamesList)
|
||||
minFirstReportedUnixMilli, err := m.getEarliestMetricTime(ctx, daemonSetsTableMetricNamesList)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
if len(missingMetrics) > 0 {
|
||||
resp.RequiredMetricsCheck = inframonitoringtypes.RequiredMetricsCheck{MissingMetrics: missingMetrics}
|
||||
resp.Records = []inframonitoringtypes.DaemonSetRecord{}
|
||||
resp.Total = 0
|
||||
return resp, nil
|
||||
}
|
||||
if req.End < int64(minFirstReportedUnixMilli) {
|
||||
resp.EndTimeBeforeRetention = true
|
||||
resp.Records = []inframonitoringtypes.DaemonSetRecord{}
|
||||
resp.Total = 0
|
||||
return resp, nil
|
||||
}
|
||||
resp.RequiredMetricsCheck = inframonitoringtypes.RequiredMetricsCheck{MissingMetrics: []string{}}
|
||||
|
||||
metadataMap, err := m.getDaemonSetsTableMetadata(ctx, req)
|
||||
if err != nil {
|
||||
|
||||
@@ -119,11 +119,7 @@ func (q *querier) QueryRange(ctx context.Context, orgID valuer.UUID, req *qbtype
|
||||
queries := make(map[string]qbtypes.Query)
|
||||
steps := make(map[string]qbtypes.Step)
|
||||
|
||||
// Resolve metric metadata once per request: patches each metric-aggregation
|
||||
// query's spec in place, returns the queries whose every aggregation was
|
||||
// missing (used for preseeded empty results), and any dormant-metric
|
||||
// warning string. NotFound errors for never-seen metrics are propagated.
|
||||
missingMetricQueries, dormantMetricsWarningMsg, err := q.resolveMetricMetadata(ctx, req.CompositeQuery.Queries, req.Start, req.End)
|
||||
missingMetricQueries, metricWarnings, err := q.resolveMetricMetadata(ctx, req.CompositeQuery.Queries, req.Start, req.End)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
@@ -240,13 +236,15 @@ func (q *querier) QueryRange(ctx context.Context, orgID valuer.UUID, req *qbtype
|
||||
}
|
||||
}
|
||||
}
|
||||
if dormantMetricsWarningMsg != "" {
|
||||
if len(metricWarnings) > 0 {
|
||||
if qbResp.Warning == nil {
|
||||
qbResp.Warning = &qbtypes.QueryWarnData{}
|
||||
}
|
||||
qbResp.Warning.Warnings = append(qbResp.Warning.Warnings, qbtypes.QueryWarnDataAdditional{
|
||||
Message: dormantMetricsWarningMsg,
|
||||
})
|
||||
for _, w := range metricWarnings {
|
||||
qbResp.Warning.Warnings = append(qbResp.Warning.Warnings, qbtypes.QueryWarnDataAdditional{
|
||||
Message: w,
|
||||
})
|
||||
}
|
||||
}
|
||||
}
|
||||
return qbResp, qbErr
|
||||
@@ -302,12 +300,11 @@ func (q *querier) populateQBEvent(event *qbtypes.QBEvent, queries []qbtypes.Quer
|
||||
// - missingMetricQueries: names of queries whose every aggregation was
|
||||
// missing. Used downstream to preseed empty result placeholders so the
|
||||
// response still has an entry per requested query name.
|
||||
// - dormantWarning: a human-readable warning describing metrics that exist in
|
||||
// the store but produced no data within the query window. Empty when no
|
||||
// such metrics are present.
|
||||
// - err: NotFound when one or more referenced metrics have never been seen,
|
||||
// or Internal when a metadata fetch fails.
|
||||
func (q *querier) resolveMetricMetadata(ctx context.Context, queries []qbtypes.QueryEnvelope, start, end uint64) (missingMetricQueries []string, dormantWarning string, err error) {
|
||||
// - metricWarnings: human-readable warnings for metrics that could not be
|
||||
// resolved: never-seen metrics and dormant metrics (seen but no data in
|
||||
// the query window).
|
||||
// - err: Internal when a metadata fetch fails.
|
||||
func (q *querier) resolveMetricMetadata(ctx context.Context, queries []qbtypes.QueryEnvelope, start, end uint64) (missingMetricQueries []string, metricWarnings []string, err error) {
|
||||
metricNames := make([]string, 0)
|
||||
for idx := range queries {
|
||||
if queries[idx].Type != qbtypes.QueryTypeBuilder {
|
||||
@@ -325,13 +322,13 @@ func (q *querier) resolveMetricMetadata(ctx context.Context, queries []qbtypes.Q
|
||||
}
|
||||
|
||||
if len(metricNames) == 0 {
|
||||
return nil, "", nil
|
||||
return nil, nil, nil
|
||||
}
|
||||
|
||||
metricTemporality, metricTypes, err := q.metadataStore.FetchTemporalityAndTypeMulti(ctx, start, end, metricNames...)
|
||||
if err != nil {
|
||||
q.logger.WarnContext(ctx, "failed to fetch metric temporality", errors.Attr(err), slog.Any("metrics", metricNames))
|
||||
return nil, "", errors.NewInternalf(errors.CodeInternal, "failed to fetch metrics temporality")
|
||||
return nil, nil, errors.NewInternalf(errors.CodeInternal, "failed to fetch metrics temporality")
|
||||
}
|
||||
q.logger.DebugContext(ctx, "fetched metric temporalities and types", slog.Any("metric_temporality", metricTemporality), slog.Any("metric_types", metricTypes))
|
||||
|
||||
@@ -363,7 +360,7 @@ func (q *querier) resolveMetricMetadata(ctx context.Context, queries []qbtypes.Q
|
||||
}
|
||||
// Type is resolved now; validate aggregation compatibility against it.
|
||||
if err := spec.Aggregations[i].ValidateForType(); err != nil {
|
||||
return nil, "", err
|
||||
return nil, nil, err
|
||||
}
|
||||
presentAggregations = append(presentAggregations, spec.Aggregations[i])
|
||||
}
|
||||
@@ -376,7 +373,7 @@ func (q *querier) resolveMetricMetadata(ctx context.Context, queries []qbtypes.Q
|
||||
}
|
||||
|
||||
if len(missingMetrics) == 0 {
|
||||
return missingMetricQueries, "", nil
|
||||
return missingMetricQueries, nil, nil
|
||||
}
|
||||
|
||||
isInternalMetric := func(n string) bool { return strings.HasPrefix(n, "signoz.") || strings.HasPrefix(n, "signoz_") }
|
||||
@@ -387,29 +384,33 @@ func (q *querier) resolveMetricMetadata(ctx context.Context, queries []qbtypes.Q
|
||||
}
|
||||
}
|
||||
if len(externalMissingMetrics) == 0 {
|
||||
// this means all missing metrics are internal, and since internal metrics
|
||||
// aren't user-controlled, skip errors/warnings for them since users can't act on them
|
||||
return missingMetricQueries, "", nil
|
||||
return missingMetricQueries, nil, nil
|
||||
}
|
||||
|
||||
// Classify each missing metric: never-seen → NotFound error; seen-but-no-
|
||||
// data-in-window → dormant warning.
|
||||
// Classify each missing metric: never-seen -> warning with empty result;
|
||||
// seen-but-no-data-in-window -> dormant warning.
|
||||
lastSeenInfo, _ := q.metadataStore.FetchLastSeenInfoMulti(ctx, externalMissingMetrics...)
|
||||
nonExistentMetrics := []string{}
|
||||
var nonExistentMetrics []string
|
||||
var dormantMetrics []string
|
||||
for _, name := range externalMissingMetrics {
|
||||
if ts, ok := lastSeenInfo[name]; ok && ts > 0 {
|
||||
dormantMetrics = append(dormantMetrics, name)
|
||||
continue
|
||||
}
|
||||
nonExistentMetrics = append(nonExistentMetrics, name)
|
||||
}
|
||||
|
||||
var warnings []string
|
||||
|
||||
// Never-seen metrics: the query already gets a preseeded empty result
|
||||
// via the aggregation-dropping path above; we just attach a warning.
|
||||
if len(nonExistentMetrics) == 1 {
|
||||
return nil, "", errors.NewNotFoundf(errors.CodeNotFound, "could not find the metric %s", nonExistentMetrics[0])
|
||||
}
|
||||
if len(nonExistentMetrics) > 1 {
|
||||
return nil, "", errors.NewNotFoundf(errors.CodeNotFound, "the following metrics were not found: %s", strings.Join(nonExistentMetrics, ", "))
|
||||
warnings = append(warnings, fmt.Sprintf("metric %s has never been received. Check the metric name and instrumentation", nonExistentMetrics[0]))
|
||||
} else if len(nonExistentMetrics) > 1 {
|
||||
warnings = append(warnings, fmt.Sprintf("the following metrics have never been received. Check the metric names and instrumentation: %s", strings.Join(nonExistentMetrics, ", ")))
|
||||
}
|
||||
|
||||
// All missing metrics are dormant — assemble the warning string.
|
||||
// Dormant metrics: seen before but no data in the query window.
|
||||
lastSeenStr := func(name string) string {
|
||||
if ts, ok := lastSeenInfo[name]; ok && ts > 0 {
|
||||
ago := humanize.RelTime(time.UnixMilli(ts), time.Now(), "ago", "from now")
|
||||
@@ -417,16 +418,16 @@ func (q *querier) resolveMetricMetadata(ctx context.Context, queries []qbtypes.Q
|
||||
}
|
||||
return name
|
||||
}
|
||||
if len(externalMissingMetrics) == 1 {
|
||||
dormantWarning = fmt.Sprintf("no data found for the metric %s in the query time range", lastSeenStr(missingMetrics[0]))
|
||||
} else {
|
||||
parts := make([]string, len(externalMissingMetrics))
|
||||
for i, m := range externalMissingMetrics {
|
||||
if len(dormantMetrics) == 1 {
|
||||
warnings = append(warnings, fmt.Sprintf("no data found for the metric %s in the query time range", lastSeenStr(dormantMetrics[0])))
|
||||
} else if len(dormantMetrics) > 1 {
|
||||
parts := make([]string, len(dormantMetrics))
|
||||
for i, m := range dormantMetrics {
|
||||
parts[i] = lastSeenStr(m)
|
||||
}
|
||||
dormantWarning = fmt.Sprintf("no data found for the following metrics in the query time range: %s", strings.Join(parts, ", "))
|
||||
warnings = append(warnings, fmt.Sprintf("no data found for the following metrics in the query time range: %s", strings.Join(parts, ", ")))
|
||||
}
|
||||
return missingMetricQueries, dormantWarning, nil
|
||||
return missingMetricQueries, warnings, nil
|
||||
}
|
||||
|
||||
func (q *querier) QueryRawStream(ctx context.Context, orgID valuer.UUID, req *qbtypes.QueryRangeRequest, client *qbtypes.RawStream) {
|
||||
|
||||
@@ -37,7 +37,7 @@ func (m *mockMetricStmtBuilder) Build(_ context.Context, _, _ uint64, _ qbtypes.
|
||||
|
||||
func TestQueryRange_MetricTypeMissing(t *testing.T) {
|
||||
// When a metric has UnspecifiedType and is not found in the metadata store,
|
||||
// the querier should return a not-found error, even if the request provides a temporality
|
||||
// the querier should return an empty result with a warning instead of an error.
|
||||
providerSettings := instrumentationtest.New().ToProviderSettings()
|
||||
metadataStore := telemetrytypestest.NewMockMetadataStore()
|
||||
|
||||
@@ -80,9 +80,14 @@ func TestQueryRange_MetricTypeMissing(t *testing.T) {
|
||||
},
|
||||
}
|
||||
|
||||
_, err := q.QueryRange(context.Background(), valuer.GenerateUUID(), req)
|
||||
require.Error(t, err)
|
||||
assert.Contains(t, err.Error(), "could not find the metric unknown_metric")
|
||||
resp, err := q.QueryRange(context.Background(), valuer.GenerateUUID(), req)
|
||||
require.NoError(t, err)
|
||||
require.NotNil(t, resp)
|
||||
require.NotNil(t, resp.Warning)
|
||||
|
||||
require.Len(t, resp.Warning.Warnings, 1)
|
||||
assert.Contains(t, resp.Warning.Warnings[0].Message, "unknown_metric")
|
||||
assert.Contains(t, resp.Warning.Warnings[0].Message, "has never been received")
|
||||
}
|
||||
|
||||
func TestQueryRange_MetricTypeFromStore(t *testing.T) {
|
||||
|
||||
@@ -101,9 +101,29 @@ func (b *MetricQueryStatementBuilder) Build(
|
||||
return nil, err
|
||||
}
|
||||
|
||||
var pairFallbackWarnings []string
|
||||
for _, sel := range keySelectors {
|
||||
if _, ok := keys[sel.Name]; !ok {
|
||||
keys[sel.Name] = []*telemetrytypes.TelemetryFieldKey{{
|
||||
Name: sel.Name,
|
||||
FieldContext: telemetrytypes.FieldContextAttribute,
|
||||
FieldDataType: telemetrytypes.FieldDataTypeString,
|
||||
Signal: telemetrytypes.SignalMetrics,
|
||||
}}
|
||||
pairFallbackWarnings = append(pairFallbackWarnings,
|
||||
fmt.Sprintf("key `%s` not found on metric %s", sel.Name, query.Aggregations[0].MetricName),
|
||||
)
|
||||
}
|
||||
}
|
||||
|
||||
start, end = querybuilder.AdjustedMetricTimeRange(start, end, uint64(query.StepInterval.Seconds()), query)
|
||||
|
||||
return b.buildPipelineStatement(ctx, start, end, query, keys, variables)
|
||||
stmt, err := b.buildPipelineStatement(ctx, start, end, query, keys, variables)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
stmt.Warnings = append(stmt.Warnings, pairFallbackWarnings...)
|
||||
return stmt, nil
|
||||
}
|
||||
|
||||
func (b *MetricQueryStatementBuilder) buildPipelineStatement(
|
||||
|
||||
@@ -217,6 +217,39 @@ func TestStatementBuilder(t *testing.T) {
|
||||
},
|
||||
expectedErr: nil,
|
||||
},
|
||||
{
|
||||
name: "test_missing_key_falls_back_to_labels",
|
||||
requestType: qbtypes.RequestTypeTimeSeries,
|
||||
query: qbtypes.QueryBuilderQuery[qbtypes.MetricAggregation]{
|
||||
Signal: telemetrytypes.SignalMetrics,
|
||||
StepInterval: qbtypes.Step{Duration: 30 * time.Second},
|
||||
Aggregations: []qbtypes.MetricAggregation{
|
||||
{
|
||||
MetricName: "signoz_calls_total",
|
||||
Type: metrictypes.SumType,
|
||||
Temporality: metrictypes.Cumulative,
|
||||
TimeAggregation: metrictypes.TimeAggregationRate,
|
||||
SpaceAggregation: metrictypes.SpaceAggregationSum,
|
||||
},
|
||||
},
|
||||
Filter: &qbtypes.Filter{
|
||||
Expression: "k8s.statefulset.name = 'my-statefulset'",
|
||||
},
|
||||
GroupBy: []qbtypes.GroupByKey{
|
||||
{
|
||||
TelemetryFieldKey: telemetrytypes.TelemetryFieldKey{
|
||||
Name: "k8s.statefulset.name",
|
||||
},
|
||||
},
|
||||
},
|
||||
},
|
||||
expected: qbtypes.Statement{
|
||||
Query: "WITH __temporal_aggregation_cte AS (SELECT ts, `k8s.statefulset.name`, multiIf(row_number() OVER rate_window = 1, nan, (per_series_value - lagInFrame(per_series_value, 1) OVER rate_window) < 0, per_series_value / (ts - lagInFrame(ts, 1) OVER rate_window), (per_series_value - lagInFrame(per_series_value, 1) OVER rate_window) / (ts - lagInFrame(ts, 1) OVER rate_window)) AS per_series_value FROM (SELECT fingerprint, toStartOfInterval(toDateTime(intDiv(unix_milli, 1000)), toIntervalSecond(30)) AS ts, `k8s.statefulset.name`, max(value) AS per_series_value FROM signoz_metrics.distributed_samples_v4 AS points INNER JOIN (SELECT fingerprint, JSONExtractString(labels, 'k8s.statefulset.name') AS `k8s.statefulset.name` FROM signoz_metrics.time_series_v4_6hrs WHERE metric_name IN (?) AND unix_milli >= ? AND unix_milli <= ? AND LOWER(temporality) LIKE LOWER(?) AND __normalized = ? AND JSONExtractString(labels, 'k8s.statefulset.name') = ? GROUP BY fingerprint, `k8s.statefulset.name`) AS filtered_time_series ON points.fingerprint = filtered_time_series.fingerprint WHERE metric_name IN (?) AND unix_milli >= ? AND unix_milli < ? GROUP BY fingerprint, ts, `k8s.statefulset.name` ORDER BY fingerprint, ts) WINDOW rate_window AS (PARTITION BY fingerprint ORDER BY fingerprint, ts)), __spatial_aggregation_cte AS (SELECT ts, `k8s.statefulset.name`, sum(per_series_value) AS value FROM __temporal_aggregation_cte WHERE isNaN(per_series_value) = ? GROUP BY ts, `k8s.statefulset.name`) SELECT * FROM __spatial_aggregation_cte ORDER BY `k8s.statefulset.name`, ts",
|
||||
Args: []any{"signoz_calls_total", uint64(1747936800000), uint64(1747983420000), "cumulative", false, "my-statefulset", "signoz_calls_total", uint64(1747947360000), uint64(1747983420000), 0},
|
||||
Warnings: []string{"key `k8s.statefulset.name` not found on metric signoz_calls_total"},
|
||||
},
|
||||
expectedErr: nil,
|
||||
},
|
||||
}
|
||||
|
||||
fm := NewFieldMapper()
|
||||
|
||||
@@ -12,7 +12,6 @@ type Clusters struct {
|
||||
Type ResponseType `json:"type" required:"true"`
|
||||
Records []ClusterRecord `json:"records" required:"true" nullable:"false"`
|
||||
Total int `json:"total" required:"true"`
|
||||
RequiredMetricsCheck RequiredMetricsCheck `json:"requiredMetricsCheck" required:"true"`
|
||||
EndTimeBeforeRetention bool `json:"endTimeBeforeRetention" required:"true"`
|
||||
Warning *qbtypes.QueryWarnData `json:"warning,omitempty"`
|
||||
}
|
||||
|
||||
@@ -12,7 +12,6 @@ type DaemonSets struct {
|
||||
Type ResponseType `json:"type" required:"true"`
|
||||
Records []DaemonSetRecord `json:"records" required:"true" nullable:"false"`
|
||||
Total int `json:"total" required:"true"`
|
||||
RequiredMetricsCheck RequiredMetricsCheck `json:"requiredMetricsCheck" required:"true"`
|
||||
EndTimeBeforeRetention bool `json:"endTimeBeforeRetention" required:"true"`
|
||||
Warning *qbtypes.QueryWarnData `json:"warning,omitempty"`
|
||||
}
|
||||
|
||||
@@ -12,7 +12,6 @@ type Deployments struct {
|
||||
Type ResponseType `json:"type" required:"true"`
|
||||
Records []DeploymentRecord `json:"records" required:"true" nullable:"false"`
|
||||
Total int `json:"total" required:"true"`
|
||||
RequiredMetricsCheck RequiredMetricsCheck `json:"requiredMetricsCheck" required:"true"`
|
||||
EndTimeBeforeRetention bool `json:"endTimeBeforeRetention" required:"true"`
|
||||
Warning *qbtypes.QueryWarnData `json:"warning,omitempty"`
|
||||
}
|
||||
|
||||
@@ -12,7 +12,6 @@ type Hosts struct {
|
||||
Type ResponseType `json:"type" required:"true"`
|
||||
Records []HostRecord `json:"records" required:"true" nullable:"false"`
|
||||
Total int `json:"total" required:"true"`
|
||||
RequiredMetricsCheck RequiredMetricsCheck `json:"requiredMetricsCheck" required:"true"`
|
||||
EndTimeBeforeRetention bool `json:"endTimeBeforeRetention" required:"true"`
|
||||
Warning *qbtypes.QueryWarnData `json:"warning,omitempty"`
|
||||
}
|
||||
@@ -30,10 +29,6 @@ type HostRecord struct {
|
||||
Meta map[string]string `json:"meta" required:"true"`
|
||||
}
|
||||
|
||||
type RequiredMetricsCheck struct {
|
||||
MissingMetrics []string `json:"missingMetrics" required:"true"`
|
||||
}
|
||||
|
||||
type PostableHosts struct {
|
||||
Start int64 `json:"start" required:"true"`
|
||||
End int64 `json:"end" required:"true"`
|
||||
|
||||
@@ -12,7 +12,6 @@ type Jobs struct {
|
||||
Type ResponseType `json:"type" required:"true"`
|
||||
Records []JobRecord `json:"records" required:"true" nullable:"false"`
|
||||
Total int `json:"total" required:"true"`
|
||||
RequiredMetricsCheck RequiredMetricsCheck `json:"requiredMetricsCheck" required:"true"`
|
||||
EndTimeBeforeRetention bool `json:"endTimeBeforeRetention" required:"true"`
|
||||
Warning *qbtypes.QueryWarnData `json:"warning,omitempty"`
|
||||
}
|
||||
|
||||
@@ -12,7 +12,6 @@ type Namespaces struct {
|
||||
Type ResponseType `json:"type" required:"true"`
|
||||
Records []NamespaceRecord `json:"records" required:"true" nullable:"false"`
|
||||
Total int `json:"total" required:"true"`
|
||||
RequiredMetricsCheck RequiredMetricsCheck `json:"requiredMetricsCheck" required:"true"`
|
||||
EndTimeBeforeRetention bool `json:"endTimeBeforeRetention" required:"true"`
|
||||
Warning *qbtypes.QueryWarnData `json:"warning,omitempty"`
|
||||
}
|
||||
|
||||
@@ -12,7 +12,6 @@ type Nodes struct {
|
||||
Type ResponseType `json:"type" required:"true"`
|
||||
Records []NodeRecord `json:"records" required:"true" nullable:"false"`
|
||||
Total int `json:"total" required:"true"`
|
||||
RequiredMetricsCheck RequiredMetricsCheck `json:"requiredMetricsCheck" required:"true"`
|
||||
EndTimeBeforeRetention bool `json:"endTimeBeforeRetention" required:"true"`
|
||||
Warning *qbtypes.QueryWarnData `json:"warning,omitempty"`
|
||||
}
|
||||
|
||||
@@ -12,7 +12,6 @@ type Pods struct {
|
||||
Type ResponseType `json:"type" required:"true"`
|
||||
Records []PodRecord `json:"records" required:"true" nullable:"false"`
|
||||
Total int `json:"total" required:"true"`
|
||||
RequiredMetricsCheck RequiredMetricsCheck `json:"requiredMetricsCheck" required:"true"`
|
||||
EndTimeBeforeRetention bool `json:"endTimeBeforeRetention" required:"true"`
|
||||
Warning *qbtypes.QueryWarnData `json:"warning,omitempty"`
|
||||
}
|
||||
|
||||
@@ -12,7 +12,6 @@ type StatefulSets struct {
|
||||
Type ResponseType `json:"type" required:"true"`
|
||||
Records []StatefulSetRecord `json:"records" required:"true" nullable:"false"`
|
||||
Total int `json:"total" required:"true"`
|
||||
RequiredMetricsCheck RequiredMetricsCheck `json:"requiredMetricsCheck" required:"true"`
|
||||
EndTimeBeforeRetention bool `json:"endTimeBeforeRetention" required:"true"`
|
||||
Warning *qbtypes.QueryWarnData `json:"warning,omitempty"`
|
||||
}
|
||||
|
||||
@@ -12,7 +12,6 @@ type Volumes struct {
|
||||
Type ResponseType `json:"type" required:"true"`
|
||||
Records []VolumeRecord `json:"records" required:"true" nullable:"false"`
|
||||
Total int `json:"total" required:"true"`
|
||||
RequiredMetricsCheck RequiredMetricsCheck `json:"requiredMetricsCheck" required:"true"`
|
||||
EndTimeBeforeRetention bool `json:"endTimeBeforeRetention" required:"true"`
|
||||
Warning *qbtypes.QueryWarnData `json:"warning,omitempty"`
|
||||
}
|
||||
|
||||
36
tests/integration/testdata/inframonitoring/hosts_metric_key_pair.jsonl
vendored
Normal file
36
tests/integration/testdata/inframonitoring/hosts_metric_key_pair.jsonl
vendored
Normal file
@@ -0,0 +1,36 @@
|
||||
{"metric_name": "system.cpu.time", "labels": {"host.name": "kp-h1", "os.type": "linux", "state": "user"}, "timestamp": "2025-01-10T10:00:00+00:00", "value": 100, "temporality": "Cumulative", "type_": "Sum", "is_monotonic": true}
|
||||
{"metric_name": "system.cpu.time", "labels": {"host.name": "kp-h1", "os.type": "linux", "state": "user"}, "timestamp": "2025-01-10T10:02:00+00:00", "value": 200, "temporality": "Cumulative", "type_": "Sum", "is_monotonic": true}
|
||||
{"metric_name": "system.cpu.time", "labels": {"host.name": "kp-h1", "os.type": "linux", "state": "user"}, "timestamp": "2025-01-10T10:04:00+00:00", "value": 300, "temporality": "Cumulative", "type_": "Sum", "is_monotonic": true}
|
||||
{"metric_name": "system.cpu.time", "labels": {"host.name": "kp-h1", "os.type": "linux", "state": "system"}, "timestamp": "2025-01-10T10:00:00+00:00", "value": 50, "temporality": "Cumulative", "type_": "Sum", "is_monotonic": true}
|
||||
{"metric_name": "system.cpu.time", "labels": {"host.name": "kp-h1", "os.type": "linux", "state": "system"}, "timestamp": "2025-01-10T10:02:00+00:00", "value": 100, "temporality": "Cumulative", "type_": "Sum", "is_monotonic": true}
|
||||
{"metric_name": "system.cpu.time", "labels": {"host.name": "kp-h1", "os.type": "linux", "state": "system"}, "timestamp": "2025-01-10T10:04:00+00:00", "value": 150, "temporality": "Cumulative", "type_": "Sum", "is_monotonic": true}
|
||||
{"metric_name": "system.cpu.time", "labels": {"host.name": "kp-h1", "os.type": "linux", "state": "idle"}, "timestamp": "2025-01-10T10:00:00+00:00", "value": 400, "temporality": "Cumulative", "type_": "Sum", "is_monotonic": true}
|
||||
{"metric_name": "system.cpu.time", "labels": {"host.name": "kp-h1", "os.type": "linux", "state": "idle"}, "timestamp": "2025-01-10T10:02:00+00:00", "value": 600, "temporality": "Cumulative", "type_": "Sum", "is_monotonic": true}
|
||||
{"metric_name": "system.cpu.time", "labels": {"host.name": "kp-h1", "os.type": "linux", "state": "idle"}, "timestamp": "2025-01-10T10:04:00+00:00", "value": 800, "temporality": "Cumulative", "type_": "Sum", "is_monotonic": true}
|
||||
{"metric_name": "system.cpu.time", "labels": {"host.name": "kp-h1", "os.type": "linux", "state": "wait"}, "timestamp": "2025-01-10T10:00:00+00:00", "value": 10, "temporality": "Cumulative", "type_": "Sum", "is_monotonic": true}
|
||||
{"metric_name": "system.cpu.time", "labels": {"host.name": "kp-h1", "os.type": "linux", "state": "wait"}, "timestamp": "2025-01-10T10:02:00+00:00", "value": 20, "temporality": "Cumulative", "type_": "Sum", "is_monotonic": true}
|
||||
{"metric_name": "system.cpu.time", "labels": {"host.name": "kp-h1", "os.type": "linux", "state": "wait"}, "timestamp": "2025-01-10T10:04:00+00:00", "value": 30, "temporality": "Cumulative", "type_": "Sum", "is_monotonic": true}
|
||||
{"metric_name": "system.memory.usage", "labels": {"host.name": "kp-h1", "os.type": "linux", "state": "used", "deployment.environment": "prod"}, "timestamp": "2025-01-10T10:00:00+00:00", "value": 2000000000, "temporality": "Unspecified", "type_": "Sum", "is_monotonic": false}
|
||||
{"metric_name": "system.memory.usage", "labels": {"host.name": "kp-h1", "os.type": "linux", "state": "used", "deployment.environment": "prod"}, "timestamp": "2025-01-10T10:02:00+00:00", "value": 2100000000, "temporality": "Unspecified", "type_": "Sum", "is_monotonic": false}
|
||||
{"metric_name": "system.memory.usage", "labels": {"host.name": "kp-h1", "os.type": "linux", "state": "used", "deployment.environment": "prod"}, "timestamp": "2025-01-10T10:04:00+00:00", "value": 2200000000, "temporality": "Unspecified", "type_": "Sum", "is_monotonic": false}
|
||||
{"metric_name": "system.memory.usage", "labels": {"host.name": "kp-h1", "os.type": "linux", "state": "free", "deployment.environment": "prod"}, "timestamp": "2025-01-10T10:00:00+00:00", "value": 6000000000, "temporality": "Unspecified", "type_": "Sum", "is_monotonic": false}
|
||||
{"metric_name": "system.memory.usage", "labels": {"host.name": "kp-h1", "os.type": "linux", "state": "free", "deployment.environment": "prod"}, "timestamp": "2025-01-10T10:02:00+00:00", "value": 5900000000, "temporality": "Unspecified", "type_": "Sum", "is_monotonic": false}
|
||||
{"metric_name": "system.memory.usage", "labels": {"host.name": "kp-h1", "os.type": "linux", "state": "free", "deployment.environment": "prod"}, "timestamp": "2025-01-10T10:04:00+00:00", "value": 5800000000, "temporality": "Unspecified", "type_": "Sum", "is_monotonic": false}
|
||||
{"metric_name": "system.memory.usage", "labels": {"host.name": "kp-h1", "os.type": "linux", "state": "buffered", "deployment.environment": "prod"}, "timestamp": "2025-01-10T10:00:00+00:00", "value": 500000000, "temporality": "Unspecified", "type_": "Sum", "is_monotonic": false}
|
||||
{"metric_name": "system.memory.usage", "labels": {"host.name": "kp-h1", "os.type": "linux", "state": "buffered", "deployment.environment": "prod"}, "timestamp": "2025-01-10T10:02:00+00:00", "value": 500000000, "temporality": "Unspecified", "type_": "Sum", "is_monotonic": false}
|
||||
{"metric_name": "system.memory.usage", "labels": {"host.name": "kp-h1", "os.type": "linux", "state": "buffered", "deployment.environment": "prod"}, "timestamp": "2025-01-10T10:04:00+00:00", "value": 500000000, "temporality": "Unspecified", "type_": "Sum", "is_monotonic": false}
|
||||
{"metric_name": "system.memory.usage", "labels": {"host.name": "kp-h1", "os.type": "linux", "state": "cached", "deployment.environment": "prod"}, "timestamp": "2025-01-10T10:00:00+00:00", "value": 1500000000, "temporality": "Unspecified", "type_": "Sum", "is_monotonic": false}
|
||||
{"metric_name": "system.memory.usage", "labels": {"host.name": "kp-h1", "os.type": "linux", "state": "cached", "deployment.environment": "prod"}, "timestamp": "2025-01-10T10:02:00+00:00", "value": 1500000000, "temporality": "Unspecified", "type_": "Sum", "is_monotonic": false}
|
||||
{"metric_name": "system.memory.usage", "labels": {"host.name": "kp-h1", "os.type": "linux", "state": "cached", "deployment.environment": "prod"}, "timestamp": "2025-01-10T10:04:00+00:00", "value": 1500000000, "temporality": "Unspecified", "type_": "Sum", "is_monotonic": false}
|
||||
{"metric_name": "system.cpu.load_average.15m", "labels": {"host.name": "kp-h1", "os.type": "linux"}, "timestamp": "2025-01-10T10:00:00+00:00", "value": 1.5, "temporality": "Unspecified", "type_": "Gauge", "is_monotonic": false}
|
||||
{"metric_name": "system.cpu.load_average.15m", "labels": {"host.name": "kp-h1", "os.type": "linux"}, "timestamp": "2025-01-10T10:02:00+00:00", "value": 1.55, "temporality": "Unspecified", "type_": "Gauge", "is_monotonic": false}
|
||||
{"metric_name": "system.cpu.load_average.15m", "labels": {"host.name": "kp-h1", "os.type": "linux"}, "timestamp": "2025-01-10T10:04:00+00:00", "value": 1.6, "temporality": "Unspecified", "type_": "Gauge", "is_monotonic": false}
|
||||
{"metric_name": "system.filesystem.usage", "labels": {"host.name": "kp-h1", "os.type": "linux", "state": "used", "deployment.environment": "prod"}, "timestamp": "2025-01-10T10:00:00+00:00", "value": 50000000000, "temporality": "Unspecified", "type_": "Sum", "is_monotonic": false}
|
||||
{"metric_name": "system.filesystem.usage", "labels": {"host.name": "kp-h1", "os.type": "linux", "state": "used", "deployment.environment": "prod"}, "timestamp": "2025-01-10T10:02:00+00:00", "value": 51000000000, "temporality": "Unspecified", "type_": "Sum", "is_monotonic": false}
|
||||
{"metric_name": "system.filesystem.usage", "labels": {"host.name": "kp-h1", "os.type": "linux", "state": "used", "deployment.environment": "prod"}, "timestamp": "2025-01-10T10:04:00+00:00", "value": 52000000000, "temporality": "Unspecified", "type_": "Sum", "is_monotonic": false}
|
||||
{"metric_name": "system.filesystem.usage", "labels": {"host.name": "kp-h1", "os.type": "linux", "state": "free", "deployment.environment": "prod"}, "timestamp": "2025-01-10T10:00:00+00:00", "value": 50000000000, "temporality": "Unspecified", "type_": "Sum", "is_monotonic": false}
|
||||
{"metric_name": "system.filesystem.usage", "labels": {"host.name": "kp-h1", "os.type": "linux", "state": "free", "deployment.environment": "prod"}, "timestamp": "2025-01-10T10:02:00+00:00", "value": 49000000000, "temporality": "Unspecified", "type_": "Sum", "is_monotonic": false}
|
||||
{"metric_name": "system.filesystem.usage", "labels": {"host.name": "kp-h1", "os.type": "linux", "state": "free", "deployment.environment": "prod"}, "timestamp": "2025-01-10T10:04:00+00:00", "value": 48000000000, "temporality": "Unspecified", "type_": "Sum", "is_monotonic": false}
|
||||
{"metric_name": "system.filesystem.usage", "labels": {"host.name": "kp-h1", "os.type": "linux", "state": "reserved", "deployment.environment": "prod"}, "timestamp": "2025-01-10T10:00:00+00:00", "value": 5000000000, "temporality": "Unspecified", "type_": "Sum", "is_monotonic": false}
|
||||
{"metric_name": "system.filesystem.usage", "labels": {"host.name": "kp-h1", "os.type": "linux", "state": "reserved", "deployment.environment": "prod"}, "timestamp": "2025-01-10T10:02:00+00:00", "value": 5000000000, "temporality": "Unspecified", "type_": "Sum", "is_monotonic": false}
|
||||
{"metric_name": "system.filesystem.usage", "labels": {"host.name": "kp-h1", "os.type": "linux", "state": "reserved", "deployment.environment": "prod"}, "timestamp": "2025-01-10T10:04:00+00:00", "value": 5000000000, "temporality": "Unspecified", "type_": "Sum", "is_monotonic": false}
|
||||
@@ -11,7 +11,7 @@ from fixtures import types
|
||||
from fixtures.auth import USER_ADMIN_EMAIL, USER_ADMIN_PASSWORD
|
||||
from fixtures.fs import get_testdata_file_path
|
||||
from fixtures.metrics import Metrics
|
||||
from fixtures.querier import compare_values
|
||||
from fixtures.querier import compare_values, get_all_warnings
|
||||
|
||||
ENDPOINT = "/api/v2/infra_monitoring/hosts"
|
||||
|
||||
@@ -56,7 +56,8 @@ def test_hosts_accuracy(
|
||||
# Shape/contract.
|
||||
assert data["total"] == len(expected["records"])
|
||||
assert len(data["records"]) == len(expected["records"])
|
||||
assert data["requiredMetricsCheck"]["missingMetrics"] == []
|
||||
# Full data present -> no warnings surfaced.
|
||||
assert get_all_warnings(response.json()) == []
|
||||
assert data["endTimeBeforeRetention"] is False
|
||||
assert {r["hostName"] for r in data["records"]} == set(exp_by_host.keys())
|
||||
|
||||
@@ -79,45 +80,109 @@ def test_hosts_accuracy(
|
||||
assert compare_values(record[field], exp[field], 1e-9), f"{record['hostName']}.{field}: got {record[field]}, expected {exp[field]}"
|
||||
|
||||
|
||||
def test_hosts_missing_metrics(
|
||||
@pytest.mark.parametrize(
|
||||
"case",
|
||||
[
|
||||
# Scenario 1: a required host metric was never ingested. Post-#11754 the
|
||||
# querier drops it instead of hard-erroring, so the endpoint returns 200
|
||||
# with the hosts that DO have data; the never-seen metric's column is the
|
||||
# -1 sentinel and a "has never been received" warning is surfaced.
|
||||
pytest.param(
|
||||
{
|
||||
"dataset": "hosts_missing_metrics.jsonl", # seeds only system.cpu.time
|
||||
"body": {"filter": {"expression": "host.name = 'miss-h1'"}},
|
||||
# singular form is "has", plural (>1 missing) is "have" -> match the common stem
|
||||
"warn_substrings": ["never been received"],
|
||||
"warn_names": [
|
||||
"system.memory.usage",
|
||||
"system.cpu.load_average.15m",
|
||||
"system.filesystem.usage",
|
||||
],
|
||||
# cpu/wait derive from the present system.cpu.time; the rest come
|
||||
# from never-seen metrics -> -1 sentinel.
|
||||
"data_fields": ["cpu", "wait"],
|
||||
"no_data_fields": ["memory", "load15", "diskUsage"],
|
||||
},
|
||||
id="metric_never_seen",
|
||||
),
|
||||
# Scenario 2: groupBy a key that IS in metadata (seen on some metrics) but
|
||||
# was never seen together with the host metrics. deployment.environment is
|
||||
# seeded on system.memory.usage + system.filesystem.usage only, so the key
|
||||
# resolves (the page-groups IN-filter parses -> no 400) but the cpu.time /
|
||||
# load_average metrics miss the (metric, key) pair and the statement builder
|
||||
# falls back to raw labels, surfacing "key `...` not found on metric ...".
|
||||
# Still 200, no hard error.
|
||||
pytest.param(
|
||||
{
|
||||
"dataset": "hosts_metric_key_pair.jsonl",
|
||||
"body": {
|
||||
"filter": {"expression": "host.name = 'kp-h1'"},
|
||||
"groupBy": [
|
||||
{
|
||||
"name": "deployment.environment",
|
||||
"fieldDataType": "string",
|
||||
"fieldContext": "resource",
|
||||
}
|
||||
],
|
||||
},
|
||||
"warn_substrings": ["key `deployment.environment` not found on metric"],
|
||||
"warn_names": [],
|
||||
"data_fields": [],
|
||||
"no_data_fields": [],
|
||||
},
|
||||
id="metric_key_pair_not_seen",
|
||||
),
|
||||
],
|
||||
)
|
||||
def test_hosts_warnings(
|
||||
signoz: types.SigNoz,
|
||||
create_user_admin: None, # pylint: disable=unused-argument
|
||||
get_token,
|
||||
insert_metrics,
|
||||
case: dict,
|
||||
) -> None:
|
||||
"""Seed only system.cpu.time; assert other 3 required metrics flagged missing."""
|
||||
"""Data-availability gaps surface as non-blocking warnings (200 + data),
|
||||
not hard errors. Covers never-seen metrics (scenario 1) and never-seen
|
||||
(metric, key) pairs via groupBy (scenario 2). Field-level expectations are
|
||||
provisional pending a real run."""
|
||||
now = datetime.now(tz=UTC).replace(microsecond=0)
|
||||
insert_metrics(
|
||||
Metrics.load_from_file(
|
||||
get_testdata_file_path("inframonitoring/hosts_missing_metrics.jsonl"),
|
||||
get_testdata_file_path(f"inframonitoring/{case['dataset']}"),
|
||||
base_time=now - timedelta(minutes=4),
|
||||
)
|
||||
)
|
||||
|
||||
token = get_token(USER_ADMIN_EMAIL, USER_ADMIN_PASSWORD)
|
||||
body: dict = {
|
||||
"start": int((now - timedelta(minutes=5)).timestamp() * 1000),
|
||||
"end": int(now.timestamp() * 1000),
|
||||
"limit": 50,
|
||||
}
|
||||
body.update(case["body"])
|
||||
|
||||
response = requests.post(
|
||||
signoz.self.host_configs["8080"].get(ENDPOINT),
|
||||
headers={"authorization": f"Bearer {token}"},
|
||||
json={
|
||||
"start": int((now - timedelta(minutes=5)).timestamp() * 1000),
|
||||
"end": int(now.timestamp() * 1000),
|
||||
"limit": 50,
|
||||
},
|
||||
json=body,
|
||||
timeout=5,
|
||||
)
|
||||
assert response.status_code == HTTPStatus.OK
|
||||
assert response.status_code == HTTPStatus.OK, response.text
|
||||
data = response.json()["data"]
|
||||
warnings = get_all_warnings(response.json())
|
||||
|
||||
assert set(data["requiredMetricsCheck"]["missingMetrics"]) == {
|
||||
"system.memory.usage",
|
||||
"system.cpu.load_average.15m",
|
||||
"system.filesystem.usage",
|
||||
}
|
||||
# Endpoint short-circuits when any required metric is missing:
|
||||
# records is empty and total=0 regardless of which hosts have partial data.
|
||||
# See pkg/modules/inframonitoring/implinframonitoring/module.go:84-89.
|
||||
assert data["records"] == []
|
||||
assert data["total"] == 0
|
||||
for substr in case["warn_substrings"]:
|
||||
assert any(substr in w["message"] for w in warnings), f"{substr!r} not surfaced: {warnings!r}"
|
||||
for name in case["warn_names"]:
|
||||
assert any(name in w["message"] for w in warnings), f"{name!r} not surfaced: {warnings!r}"
|
||||
|
||||
assert len(data["records"]) >= 1, f"expected at least one record: {data!r}"
|
||||
if case["data_fields"] or case["no_data_fields"]:
|
||||
record = data["records"][0]
|
||||
for field in case["data_fields"]:
|
||||
assert record[field] != -1, f"expected {field} populated, got {record[field]}"
|
||||
for field in case["no_data_fields"]:
|
||||
assert record[field] == -1, f"expected {field} == -1 sentinel, got {record[field]}"
|
||||
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
|
||||
@@ -614,7 +614,7 @@ def test_histogram_p90_returns_warning_outside_data_window(
|
||||
assert warnings[0]["message"].startswith(f"no data found for the metric {metric_name}")
|
||||
|
||||
|
||||
def test_non_existent_metrics_returns_404(
|
||||
def test_non_existent_metrics_returns_warning(
|
||||
signoz: types.SigNoz,
|
||||
create_user_admin: None, # pylint: disable=unused-argument
|
||||
get_token: Callable[[str, str], str],
|
||||
@@ -635,9 +635,11 @@ def test_non_existent_metrics_returns_404(
|
||||
|
||||
start_2h = int((now - timedelta(hours=2)).timestamp() * 1000)
|
||||
response = make_query_request(signoz, token, start_2h, end_ms, [query])
|
||||
assert response.status_code == HTTPStatus.NOT_FOUND
|
||||
assert response.status_code == HTTPStatus.OK
|
||||
|
||||
assert get_error_message(response.json()) == "could not find the metric whatevergoennnsgoeshere"
|
||||
data = response.json()
|
||||
warnings = get_all_warnings(data)
|
||||
assert any("whatevergoennnsgoeshere" in w["message"] and "has never been received" in w["message"] for w in warnings), f"expected never-seen metric warning, got: {warnings}"
|
||||
|
||||
|
||||
def test_non_existent_internal_metrics_returns_no_warning(
|
||||
|
||||
Reference in New Issue
Block a user