Compare commits

..

133 Commits

Author SHA1 Message Date
nikhilmantri0902
9203602abd chore: rename 2026-04-28 17:30:11 +05:30
Nikhil Mantri
c10d278ec4 Merge branch 'infraM/v2_nodes_list_api' into infraM/v2_namespaces_list_api 2026-04-28 17:12:27 +05:30
nikhilmantri0902
8b0e8f666e chore: v2 nodes api 2026-04-28 17:02:19 +05:30
nikhilmantri0902
1d89b03f10 chore: merged main and resolved conflicts 2026-04-28 16:38:57 +05:30
nikhilmantri0902
7d702763cc chore: namespaces code 2026-04-28 13:44:21 +05:30
nikhilmantri0902
3a61a78986 chore: merged base branch 2026-04-28 12:48:20 +05:30
Nikhil Mantri
46e833faba Merge branch 'main' into infraM/v2_pods_list_api 2026-04-28 12:15:56 +05:30
nikhilmantri0902
4bd7492629 chore: updated comment 2026-04-28 12:07:04 +05:30
nikhilmantri0902
401701e036 chore: metadata fix 2026-04-27 19:23:11 +05:30
nikhilmantri0902
3bec0df0ad chore: nodes list v2 full blown 2026-04-27 16:17:39 +05:30
Nikhil Mantri
24fe9a986d feat(infra-monitoring): v2 pods list apis - phase counts when custom grouping (#11088)
* chore: added phase counts feature

* chore: added queries for pod phase counts in custom group by

* chore: added unknown phase count

* fix: isPodUIDInGroupBy in buildPodRecords

* chore: 3 cte --> 2 cte

* chore: pod phase with local table of time series as counts

* chore: comment correction

* chore: corrected comment

* chore: value column for samples table added

* chore: removed query G for phase counts

* chore: rename variable

* chore: added PodPhaseNum constants to types
2026-04-27 16:04:55 +05:30
nikhilmantri0902
520e92049c Merge branch 'feat/v2_pods_list_api_phase_counts' of github.com:SigNoz/signoz into infraM/v2_nodes_list_api 2026-04-27 15:43:09 +05:30
Nikhil Mantri
92d297ac9d Merge branch 'main' into infraM/v2_pods_list_api 2026-04-27 15:05:04 +05:30
nikhilmantri0902
eff29aefba chore: added PodPhaseNum constants to types 2026-04-27 13:59:08 +05:30
nikhilmantri0902
ca73453c9e chore: rename variable 2026-04-27 13:53:17 +05:30
nikhilmantri0902
1d836d674f chore: removed query G for phase counts 2026-04-27 13:46:03 +05:30
nikhilmantri0902
83724b0cde chore: value column for samples table added 2026-04-27 13:09:46 +05:30
nikhilmantri0902
3050e37ec7 chore: corrected comment 2026-04-27 12:43:17 +05:30
Ashwin Bhatkal
bdbaa32485 Merge branch 'main' into infraM/v2_pods_list_api 2026-04-27 11:44:13 +05:30
nikhilmantri0902
55b2215025 chore: comment correction 2026-04-24 17:14:56 +05:30
nikhilmantri0902
e90378e618 Merge branch 'infraM/v2_pods_list_api' into feat/v2_pods_list_api_phase_counts 2026-04-24 16:47:15 +05:30
nikhilmantri0902
4592b78f48 chore: pod phase with local table of time series as counts 2026-04-24 15:27:38 +05:30
nikhilmantri0902
d81c99feae chore: 3 cte --> 2 cte 2026-04-24 14:58:26 +05:30
nikhilmantri0902
65a456ff9e fix: isPodUIDInGroupBy in buildPodRecords 2026-04-24 14:36:43 +05:30
nikhilmantri0902
264577b673 chore: added unknown phase count 2026-04-24 13:29:09 +05:30
Ashwin Bhatkal
b35c6676f9 fix: rebase fixes 2026-04-24 13:17:02 +05:30
nikhilmantri0902
c78c9a42db chore: merged base 2026-04-24 13:03:21 +05:30
nikhilmantri0902
1095caa123 chore: improved api description to document -1 as no data in numeric fields 2026-04-24 12:11:45 +05:30
nikhilmantri0902
9043b49762 chore: removed pods - order by phase 2026-04-24 12:04:51 +05:30
nikhilmantri0902
d4084a7494 chore: added support for pod phase unknown 2026-04-24 11:46:26 +05:30
nikhilmantri0902
27c564b3bf chore: added required tags 2026-04-24 11:21:20 +05:30
Nikhil Mantri
f02c491828 Merge branch 'main' into infraM/v2_pods_list_api 2026-04-24 10:47:13 +05:30
Nikhil Mantri
3d53b8f77f Merge branch 'main' into infraM/v2_pods_list_api 2026-04-23 18:44:33 +05:30
nikhilmantri0902
dffe94fec4 chore: conflicts resolved 2026-04-23 18:39:39 +05:30
nikhilmantri0902
b7d4f18aae chore: added queries for pod phase counts in custom group by 2026-04-23 18:01:26 +05:30
nikhilmantri0902
9ad2ec428a chore: added phase counts feature 2026-04-23 16:50:39 +05:30
nikhilmantri0902
c9360fcf13 Merge branch 'infraM/v2_hosts_list_api' into infraM/v2_pods_list_api 2026-04-23 11:23:49 +05:30
nikhilmantri0902
b5ab45db20 chore: regen api client for inframonitoring
Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
2026-04-23 10:51:35 +05:30
Nikhil Mantri
08f76aca78 Merge branch 'main' into infraM/v2_hosts_list_api 2026-04-23 09:51:01 +05:30
nikhilmantri0902
983d4fe4f2 Merge branch 'infraM/v2_hosts_list_api' into infraM/v2_pods_list_api 2026-04-22 15:37:21 +05:30
nikhilmantri0902
833af794c3 chore: make sort stable in case of tiebreaker by comparing composite group by keys 2026-04-22 15:26:28 +05:30
nikhilmantri0902
21b51d1fcc chore: cleanup and rename 2026-04-22 15:13:00 +05:30
nikhilmantri0902
56f22682c8 Merge branch 'infraM/v2_hosts_list_api' into infraM/v2_pods_list_api 2026-04-22 14:29:17 +05:30
nikhilmantri0902
9c8359940c chore: remove a defensive nil map check, the function ensure non-nil map when err nil 2026-04-22 11:59:01 +05:30
Nikhil Mantri
4050880275 Merge branch 'main' into infraM/v2_hosts_list_api 2026-04-22 11:35:57 +05:30
nikhilmantri0902
5e775f64f2 chore: added status unauthorized 2026-04-21 21:30:44 +05:30
nikhilmantri0902
0189f23f46 chore: removed internal server error 2026-04-21 21:30:01 +05:30
nikhilmantri0902
49a36d4e3d chore: removed pod metric temporalities 2026-04-21 21:24:49 +05:30
nikhilmantri0902
9407d658ab chore: merge base hosts v2 branch 2026-04-21 21:17:28 +05:30
nikhilmantri0902
5035712485 chore: added json tag required: true 2026-04-21 18:50:25 +05:30
nikhilmantri0902
bab17c3615 chore: comments resolve 2026-04-21 18:33:56 +05:30
Nikhil Mantri
37b44f4db9 Merge branch 'main' into infraM/v2_hosts_list_api 2026-04-21 17:40:06 +05:30
nikhilmantri0902
99dd6e5f1e chore: pods code restructuring 2026-04-21 17:03:13 +05:30
nikhilmantri0902
9c7131fa6a chore: merge base branch 2026-04-21 16:22:55 +05:30
Nikhil Mantri
ad889a2e1d Merge branch 'main' into infraM/v2_hosts_list_api 2026-04-21 13:48:53 +05:30
nikhilmantri0902
a4f6d0cbf5 chore: removed temporalities 2026-04-21 13:44:06 +05:30
nikhilmantri0902
589bed7c16 chore: comments correction 2026-04-21 12:50:51 +05:30
nikhilmantri0902
93843a1f48 chore: file structure further breakdown for clarity 2026-04-21 12:36:07 +05:30
nikhilmantri0902
88c43108fc chore: added types package 2026-04-20 18:52:43 +05:30
nikhilmantri0902
ed4cf540e8 chore: inframonitoring types renaming 2026-04-20 18:47:28 +05:30
nikhilmantri0902
9e2dfa9033 chore: rearrangement 2026-04-20 17:51:03 +05:30
nikhilmantri0902
d98d5d68ee chore: rename PodsList -> ListPods 2026-04-20 16:57:21 +05:30
nikhilmantri0902
2cb1c3b73b chore: rename HostsList -> ListHosts 2026-04-20 16:42:19 +05:30
nikhilmantri0902
ae7ca497ad chore: merged base hosts branch and reorganized code 2026-04-20 13:38:25 +05:30
Nikhil Mantri
a579916961 Merge branch 'main' into infraM/v2_hosts_list_api 2026-04-20 11:05:36 +05:30
Nikhil Mantri
4a16d56abf feat(infra-monitoring): v2 hosts list - return counts of active & inactive hosts for custom group by attributes (#10956)
* chore: add functionality for showing active and inactive counts in custom group by

* chore: bug fix

* chore: added subquery for active and total count

* chore: ignore empty string hosts in get active hosts

* fix: sinceUnixMilli for determining active hosts compute once per request

* chore: refactor code
2026-04-20 10:41:15 +05:30
Nikhil Mantri
642b5ac3f0 Merge branch 'main' into infraM/v2_hosts_list_api 2026-04-16 16:32:39 +05:30
Nikhil Mantri
a12112619c Merge branch 'main' into infraM/v2_hosts_list_api 2026-04-16 15:41:35 +05:30
nikhilmantri0902
014785f1bc chore: ignore empty string hosts in get active hosts 2026-04-16 13:17:15 +05:30
Nikhil Mantri
58ee797b10 Merge branch 'main' into infraM/v2_hosts_list_api 2026-04-15 14:18:29 +05:30
Nikhil Mantri
82d236742f Merge branch 'main' into infraM/v2_hosts_list_api 2026-04-15 11:21:33 +05:30
nikhilmantri0902
397e1ad5be chore: added TODOs and made filterByStatus a part of filter struct 2026-04-14 18:32:48 +05:30
nikhilmantri0902
8d6b25ca9b chore: resolved conflicts 2026-04-14 17:09:17 +05:30
nikhilmantri0902
5fa6bd8b8d Merge branch 'main' into infraM/v2_hosts_list_api 2026-04-13 11:02:14 +05:30
nikhilmantri0902
bd9977483b chore: improved description 2026-04-11 11:31:35 +05:30
nikhilmantri0902
50fbdfeeef chore: validate order by to validate function 2026-04-10 19:01:45 +05:30
nikhilmantri0902
e2b1b73e87 chore: improvements 2026-04-10 13:23:33 +05:30
nikhilmantri0902
cb9f3fd3e5 chore: rearrage 2026-04-10 00:39:23 +05:30
nikhilmantri0902
232acc343d chore: escape backtick to prevent sql injection 2026-04-10 00:01:01 +05:30
nikhilmantri0902
2025afdccc chore: endpoint modification openapi 2026-04-09 23:25:59 +05:30
nikhilmantri0902
d2f4d4af93 chore: endpoint correction 2026-04-09 23:21:57 +05:30
Nikhil Mantri
47ff7bbb8e Merge branch 'main' into infraM/v2_hosts_list_api 2026-04-09 23:20:39 +05:30
Nikhil Mantri
724071c5dc Merge branch 'main' into infraM/v2_hosts_list_api 2026-04-09 18:30:15 +05:30
nikhilmantri0902
4d24979358 chore: frontend fix 2026-04-09 18:26:42 +05:30
nikhilmantri0902
042943b10a chore: distributed samples table to local table change for get metadata 2026-04-09 18:24:45 +05:30
nikhilmantri0902
48a9be7ec8 chore: added required metrics check 2026-04-09 17:38:48 +05:30
nikhilmantri0902
a9504b2120 chore: added a TODO remark 2026-04-09 16:08:34 +05:30
nikhilmantri0902
8755887c4a chore: added better metrics existence check 2026-04-09 16:01:35 +05:30
Nikhil Mantri
4cb4662b3a Merge branch 'main' into infraM/v2_hosts_list_api 2026-04-09 15:14:25 +05:30
nikhilmantri0902
e6900dabc8 chore: warnings added passing from queryResponse warning to host lists response struct 2026-04-09 00:09:38 +05:30
nikhilmantri0902
c1ba389b63 chore: add type for response and files rearrange 2026-04-08 23:35:53 +05:30
nikhilmantri0902
3a1f40234f Merge branch 'main' into infraM/v2_hosts_list_api 2026-04-08 23:03:50 +05:30
Nikhil Mantri
2e4891fa63 Merge branch 'main' into infraM/v2_hosts_list_api 2026-04-08 16:07:57 +05:30
Nikhil Mantri
04ebc0bec7 Merge branch 'main' into infraM/v2_hosts_list_api 2026-04-08 11:08:10 +05:30
nikhilmantri0902
271f9b81ed Merge branch 'infraM/v2_hosts_list_api' into infraM/v2_pods_list_api 2026-04-07 21:55:47 +05:30
nikhilmantri0902
6fa815c294 chore: modified getMetadata query 2026-04-07 18:55:57 +05:30
nikhilmantri0902
63ec518efb chore: added hostName logic 2026-04-07 17:36:15 +05:30
nikhilmantri0902
c4ca20dd90 chore: return errors from getMetadata and lint fix 2026-04-07 17:01:13 +05:30
nikhilmantri0902
e56cc4222b chore: return errors from getMetadata and lint fix 2026-04-07 16:57:35 +05:30
nikhilmantri0902
07d2944d7c chore: yarn generate api 2026-04-07 16:44:06 +05:30
nikhilmantri0902
dea01ae36a chore: hostStatusNone added for clarity that this field can be left empty as well in payload 2026-04-07 16:32:25 +05:30
nikhilmantri0902
62ea5b54e2 Merge branch 'main' into infraM/v2_hosts_list_api 2026-04-07 14:09:48 +05:30
nikhilmantri0902
e549a7e42f chore: added pods list api updates 2026-04-07 13:58:10 +05:30
nikhilmantri0902
90e2ebb11f Merge branch 'infraM/v2_hosts_list_api' into infraM/v2_pods_list_api 2026-04-07 13:51:35 +05:30
nikhilmantri0902
61baa1be7a chore: code improvements 2026-04-07 13:49:00 +05:30
nikhilmantri0902
b946fa665f Merge branch 'infraM/v2_hosts_list_api' into infraM/v2_pods_list_api 2026-04-07 11:15:35 +05:30
nikhilmantri0902
2e049556e4 chore: unified composite key function 2026-04-07 11:15:03 +05:30
nikhilmantri0902
492a5e70d7 chore: added pods metrics temporality 2026-04-06 17:33:44 +05:30
nikhilmantri0902
ba1f2771e8 Merge branch 'infraM/v2_hosts_list_api' into infraM/v2_pods_list_api 2026-04-06 17:18:44 +05:30
nikhilmantri0902
7458fb4855 Merge branch 'main' into infraM/v2_hosts_list_api 2026-04-06 17:18:01 +05:30
nikhilmantri0902
5f55f3938b chore: added temporalities of metrics 2026-04-06 17:17:15 +05:30
nikhilmantri0902
3e8102485c Merge branch 'infraM/v2_hosts_list_api' into infraM/v2_pods_list_api 2026-04-04 20:52:50 +05:30
nikhilmantri0902
861c682ea5 chore: nil pointer dereference fix in req.Filter 2026-04-04 20:52:08 +05:30
nikhilmantri0902
c8e5895dff chore: nil pointer check 2026-04-04 20:45:04 +05:30
nikhilmantri0902
82d72e7edb chore: pods api meta start time 2026-04-04 17:18:04 +05:30
nikhilmantri0902
a3f8ecaaf1 chore: merged base branch 2026-04-04 16:47:10 +05:30
nikhilmantri0902
19aada656c chore: updated spec 2026-04-04 16:44:15 +05:30
nikhilmantri0902
b21bb4280f chore: updated openapi yml 2026-04-04 16:38:22 +05:30
nikhilmantri0902
bc0a4fdb5c chore: added pods list logic 2026-04-04 13:24:46 +05:30
nikhilmantri0902
37fb0e9254 Merge branch 'infraM/base_dependencies' into infraM/v2_hosts_list_api 2026-04-03 17:49:00 +05:30
nikhilmantri0902
aecfa1a174 chore: added validation on order by 2026-04-02 20:13:30 +05:30
nikhilmantri0902
b869d23d94 chore: moved funcs 2026-04-02 20:02:22 +05:30
nikhilmantri0902
6ee3d44f76 chore: removed isSendingK8sAgentsMetricsCode 2026-04-02 19:58:30 +05:30
nikhilmantri0902
462e554107 chore: yarn generate api 2026-04-02 14:49:15 +05:30
nikhilmantri0902
66afa73e6f chore: return status as a string 2026-04-02 14:39:02 +05:30
nikhilmantri0902
54c604bcf4 chore: added some unit tests 2026-04-02 14:20:27 +05:30
nikhilmantri0902
c1be02ba54 chore: added validate function 2026-04-02 14:14:34 +05:30
nikhilmantri0902
d3c7ba8f45 chore: disk usage 2026-04-02 14:01:18 +05:30
nikhilmantri0902
039c4a0496 fix: bug fix 2026-04-02 11:32:49 +05:30
nikhilmantri0902
51a94b6bbc chore: added logic for hosts v3 api 2026-04-02 02:52:28 +05:30
nikhilmantri0902
bbfbb94f52 chore: merged main 2026-04-01 00:45:40 +05:30
nikhilmantri0902
d1eb9ef16f chore: endpoint detail update 2026-03-31 16:16:31 +05:30
nikhilmantri0902
3db00f8bc3 chore: baseline setup 2026-03-31 15:27:18 +05:30
37 changed files with 2572 additions and 439 deletions

View File

@@ -2474,6 +2474,132 @@ components:
- requiredMetricsCheck
- endTimeBeforeRetention
type: object
InframonitoringtypesNamespaceRecord:
properties:
failedPodCount:
type: integer
meta:
additionalProperties: {}
nullable: true
type: object
namespaceCPU:
format: double
type: number
namespaceMemory:
format: double
type: number
namespaceName:
type: string
pendingPodCount:
type: integer
runningPodCount:
type: integer
succeededPodCount:
type: integer
unknownPodCount:
type: integer
required:
- namespaceName
- namespaceCPU
- namespaceMemory
- pendingPodCount
- runningPodCount
- succeededPodCount
- failedPodCount
- unknownPodCount
- meta
type: object
InframonitoringtypesNamespaces:
properties:
endTimeBeforeRetention:
type: boolean
records:
items:
$ref: '#/components/schemas/InframonitoringtypesNamespaceRecord'
nullable: true
type: array
requiredMetricsCheck:
$ref: '#/components/schemas/InframonitoringtypesRequiredMetricsCheck'
total:
type: integer
type:
$ref: '#/components/schemas/InframonitoringtypesResponseType'
warning:
$ref: '#/components/schemas/Querybuildertypesv5QueryWarnData'
required:
- type
- records
- total
- requiredMetricsCheck
- endTimeBeforeRetention
type: object
InframonitoringtypesNodeCondition:
enum:
- ready
- not_ready
- ""
type: string
InframonitoringtypesNodeRecord:
properties:
condition:
$ref: '#/components/schemas/InframonitoringtypesNodeCondition'
meta:
additionalProperties: {}
nullable: true
type: object
nodeCPU:
format: double
type: number
nodeCPUAllocatable:
format: double
type: number
nodeMemory:
format: double
type: number
nodeMemoryAllocatable:
format: double
type: number
nodeName:
type: string
notReadyNodesCount:
type: integer
readyNodesCount:
type: integer
required:
- nodeName
- condition
- readyNodesCount
- notReadyNodesCount
- nodeCPU
- nodeCPUAllocatable
- nodeMemory
- nodeMemoryAllocatable
- meta
type: object
InframonitoringtypesNodes:
properties:
endTimeBeforeRetention:
type: boolean
records:
items:
$ref: '#/components/schemas/InframonitoringtypesNodeRecord'
nullable: true
type: array
requiredMetricsCheck:
$ref: '#/components/schemas/InframonitoringtypesRequiredMetricsCheck'
total:
type: integer
type:
$ref: '#/components/schemas/InframonitoringtypesResponseType'
warning:
$ref: '#/components/schemas/Querybuildertypesv5QueryWarnData'
required:
- type
- records
- total
- requiredMetricsCheck
- endTimeBeforeRetention
type: object
InframonitoringtypesPodPhase:
enum:
- pending
@@ -2591,6 +2717,58 @@ components:
- end
- limit
type: object
InframonitoringtypesPostableNamespaces:
properties:
end:
format: int64
type: integer
filter:
$ref: '#/components/schemas/Querybuildertypesv5Filter'
groupBy:
items:
$ref: '#/components/schemas/Querybuildertypesv5GroupByKey'
nullable: true
type: array
limit:
type: integer
offset:
type: integer
orderBy:
$ref: '#/components/schemas/Querybuildertypesv5OrderBy'
start:
format: int64
type: integer
required:
- start
- end
- limit
type: object
InframonitoringtypesPostableNodes:
properties:
end:
format: int64
type: integer
filter:
$ref: '#/components/schemas/Querybuildertypesv5Filter'
groupBy:
items:
$ref: '#/components/schemas/Querybuildertypesv5GroupByKey'
nullable: true
type: array
limit:
type: integer
offset:
type: integer
orderBy:
$ref: '#/components/schemas/Querybuildertypesv5OrderBy'
start:
format: int64
type: integer
required:
- start
- end
- limit
type: object
InframonitoringtypesPostablePods:
properties:
end:
@@ -11059,6 +11237,145 @@ paths:
summary: List Hosts for Infra Monitoring
tags:
- inframonitoring
/api/v2/infra_monitoring/namespaces:
post:
deprecated: false
description: 'Returns a paginated list of Kubernetes namespaces with key aggregated
pod metrics: CPU usage and memory working set (summed across pods in the group),
plus per-group pod counts bucketed by each pod''s latest k8s.pod.phase value
in the window (pendingPodCount, runningPodCount, succeededPodCount, failedPodCount,
unknownPodCount). Each namespace includes metadata attributes (k8s.namespace.name,
k8s.cluster.name). The response type is ''list'' for the default k8s.namespace.name
grouping or ''grouped_list'' for custom groupBy keys; in both modes every
row aggregates pods in the group. Supports filtering via a filter expression,
custom groupBy, ordering by cpu / memory, and pagination via offset/limit.
Also reports missing required metrics and whether the requested time range
falls before the data retention boundary. Numeric metric fields (namespaceCPU,
namespaceMemory) return -1 as a sentinel when no data is available for that
field.'
operationId: ListNamespaces
requestBody:
content:
application/json:
schema:
$ref: '#/components/schemas/InframonitoringtypesPostableNamespaces'
responses:
"200":
content:
application/json:
schema:
properties:
data:
$ref: '#/components/schemas/InframonitoringtypesNamespaces'
status:
type: string
required:
- status
- data
type: object
description: OK
"400":
content:
application/json:
schema:
$ref: '#/components/schemas/RenderErrorResponse'
description: Bad Request
"401":
content:
application/json:
schema:
$ref: '#/components/schemas/RenderErrorResponse'
description: Unauthorized
"403":
content:
application/json:
schema:
$ref: '#/components/schemas/RenderErrorResponse'
description: Forbidden
"500":
content:
application/json:
schema:
$ref: '#/components/schemas/RenderErrorResponse'
description: Internal Server Error
security:
- api_key:
- VIEWER
- tokenizer:
- VIEWER
summary: List Namespaces for Infra Monitoring
tags:
- inframonitoring
/api/v2/infra_monitoring/nodes:
post:
deprecated: false
description: 'Returns a paginated list of Kubernetes nodes with key metrics:
CPU usage, CPU allocatable, memory working set, memory allocatable, and per-group
readyNodesCount / notReadyNodesCount derived from each node''s latest k8s.node.condition_ready
value in the window. Each node includes metadata attributes (k8s.node.uid,
k8s.cluster.name). The response type is ''list'' for the default k8s.node.name
grouping (each row is one node with its current condition string: ready /
not_ready / '''') or ''grouped_list'' for custom groupBy keys (each row aggregates
nodes in the group with readyNodesCount and notReadyNodesCount; condition
stays empty). Supports filtering via a filter expression, custom groupBy,
ordering by cpu / cpu_allocatable / memory / memory_allocatable, and pagination
via offset/limit. Also reports missing required metrics and whether the requested
time range falls before the data retention boundary. Numeric metric fields
(nodeCPU, nodeCPUAllocatable, nodeMemory, nodeMemoryAllocatable) return -1
as a sentinel when no data is available for that field.'
operationId: ListNodes
requestBody:
content:
application/json:
schema:
$ref: '#/components/schemas/InframonitoringtypesPostableNodes'
responses:
"200":
content:
application/json:
schema:
properties:
data:
$ref: '#/components/schemas/InframonitoringtypesNodes'
status:
type: string
required:
- status
- data
type: object
description: OK
"400":
content:
application/json:
schema:
$ref: '#/components/schemas/RenderErrorResponse'
description: Bad Request
"401":
content:
application/json:
schema:
$ref: '#/components/schemas/RenderErrorResponse'
description: Unauthorized
"403":
content:
application/json:
schema:
$ref: '#/components/schemas/RenderErrorResponse'
description: Forbidden
"500":
content:
application/json:
schema:
$ref: '#/components/schemas/RenderErrorResponse'
description: Internal Server Error
security:
- api_key:
- VIEWER
- tokenizer:
- VIEWER
summary: List Nodes for Infra Monitoring
tags:
- inframonitoring
/api/v2/infra_monitoring/pods:
post:
deprecated: false

View File

@@ -13,6 +13,7 @@ import (
"github.com/SigNoz/signoz/pkg/errors"
baserules "github.com/SigNoz/signoz/pkg/query-service/rules"
"github.com/SigNoz/signoz/pkg/types/ruletypes"
"github.com/SigNoz/signoz/pkg/valuer"
)
func PrepareTaskFunc(opts baserules.PrepareTaskOptions) (baserules.Task, error) {
@@ -48,7 +49,7 @@ func PrepareTaskFunc(opts baserules.PrepareTaskOptions) (baserules.Task, error)
rules = append(rules, tr)
// create ch rule task for evaluation
task = newTask(baserules.TaskTypeCh, opts.TaskName, evaluation.GetFrequency().Duration(), rules, opts.ManagerOpts, opts.NotifyFunc)
task = newTask(baserules.TaskTypeCh, opts.TaskName, evaluation.GetFrequency().Duration(), rules, opts.ManagerOpts, opts.NotifyFunc, opts.MaintenanceStore, opts.OrgID)
} else if opts.Rule.RuleType == ruletypes.RuleTypeProm {
@@ -72,7 +73,7 @@ func PrepareTaskFunc(opts baserules.PrepareTaskOptions) (baserules.Task, error)
rules = append(rules, pr)
// create promql rule task for evaluation
task = newTask(baserules.TaskTypeProm, opts.TaskName, evaluation.GetFrequency().Duration(), rules, opts.ManagerOpts, opts.NotifyFunc)
task = newTask(baserules.TaskTypeProm, opts.TaskName, evaluation.GetFrequency().Duration(), rules, opts.ManagerOpts, opts.NotifyFunc, opts.MaintenanceStore, opts.OrgID)
} else if opts.Rule.RuleType == ruletypes.RuleTypeAnomaly {
// create anomaly rule
@@ -95,7 +96,7 @@ func PrepareTaskFunc(opts baserules.PrepareTaskOptions) (baserules.Task, error)
rules = append(rules, ar)
// create anomaly rule task for evaluation
task = newTask(baserules.TaskTypeCh, opts.TaskName, evaluation.GetFrequency().Duration(), rules, opts.ManagerOpts, opts.NotifyFunc)
task = newTask(baserules.TaskTypeCh, opts.TaskName, evaluation.GetFrequency().Duration(), rules, opts.ManagerOpts, opts.NotifyFunc, opts.MaintenanceStore, opts.OrgID)
} else {
return nil, errors.NewInvalidInputf(errors.CodeInvalidInput, "unsupported rule type %s. Supported types: %s, %s", opts.Rule.RuleType, ruletypes.RuleTypeProm, ruletypes.RuleTypeThreshold)
@@ -209,9 +210,9 @@ func TestNotification(opts baserules.PrepareTestRuleOptions) (int, error) {
}
// newTask returns an appropriate group for the rule type
func newTask(taskType baserules.TaskType, name string, frequency time.Duration, rules []baserules.Rule, opts *baserules.ManagerOptions, notify baserules.NotifyFunc) baserules.Task {
func newTask(taskType baserules.TaskType, name string, frequency time.Duration, rules []baserules.Rule, opts *baserules.ManagerOptions, notify baserules.NotifyFunc, maintenanceStore ruletypes.MaintenanceStore, orgID valuer.UUID) baserules.Task {
if taskType == baserules.TaskTypeCh {
return baserules.NewRuleTask(name, "", frequency, rules, opts, notify)
return baserules.NewRuleTask(name, "", frequency, rules, opts, notify, maintenanceStore, orgID)
}
return baserules.NewPromRuleTask(name, "", frequency, rules, opts, notify)
return baserules.NewPromRuleTask(name, "", frequency, rules, opts, notify, maintenanceStore, orgID)
}

View File

@@ -13,8 +13,12 @@ import type {
import type {
InframonitoringtypesPostableHostsDTO,
InframonitoringtypesPostableNamespacesDTO,
InframonitoringtypesPostableNodesDTO,
InframonitoringtypesPostablePodsDTO,
ListHosts200,
ListNamespaces200,
ListNodes200,
ListPods200,
RenderErrorResponseDTO,
} from '../sigNoz.schemas';
@@ -106,6 +110,174 @@ export const useListHosts = <
return useMutation(mutationOptions);
};
/**
* Returns a paginated list of Kubernetes namespaces with key aggregated pod metrics: CPU usage and memory working set (summed across pods in the group), plus per-group pod counts bucketed by each pod's latest k8s.pod.phase value in the window (pendingPodCount, runningPodCount, succeededPodCount, failedPodCount, unknownPodCount). Each namespace includes metadata attributes (k8s.namespace.name, k8s.cluster.name). The response type is 'list' for the default k8s.namespace.name grouping or 'grouped_list' for custom groupBy keys; in both modes every row aggregates pods in the group. Supports filtering via a filter expression, custom groupBy, ordering by cpu / memory, and pagination via offset/limit. Also reports missing required metrics and whether the requested time range falls before the data retention boundary. Numeric metric fields (namespaceCPU, namespaceMemory) return -1 as a sentinel when no data is available for that field.
* @summary List Namespaces for Infra Monitoring
*/
export const listNamespaces = (
inframonitoringtypesPostableNamespacesDTO: BodyType<InframonitoringtypesPostableNamespacesDTO>,
signal?: AbortSignal,
) => {
return GeneratedAPIInstance<ListNamespaces200>({
url: `/api/v2/infra_monitoring/namespaces`,
method: 'POST',
headers: { 'Content-Type': 'application/json' },
data: inframonitoringtypesPostableNamespacesDTO,
signal,
});
};
export const getListNamespacesMutationOptions = <
TError = ErrorType<RenderErrorResponseDTO>,
TContext = unknown,
>(options?: {
mutation?: UseMutationOptions<
Awaited<ReturnType<typeof listNamespaces>>,
TError,
{ data: BodyType<InframonitoringtypesPostableNamespacesDTO> },
TContext
>;
}): UseMutationOptions<
Awaited<ReturnType<typeof listNamespaces>>,
TError,
{ data: BodyType<InframonitoringtypesPostableNamespacesDTO> },
TContext
> => {
const mutationKey = ['listNamespaces'];
const { mutation: mutationOptions } = options
? options.mutation &&
'mutationKey' in options.mutation &&
options.mutation.mutationKey
? options
: { ...options, mutation: { ...options.mutation, mutationKey } }
: { mutation: { mutationKey } };
const mutationFn: MutationFunction<
Awaited<ReturnType<typeof listNamespaces>>,
{ data: BodyType<InframonitoringtypesPostableNamespacesDTO> }
> = (props) => {
const { data } = props ?? {};
return listNamespaces(data);
};
return { mutationFn, ...mutationOptions };
};
export type ListNamespacesMutationResult = NonNullable<
Awaited<ReturnType<typeof listNamespaces>>
>;
export type ListNamespacesMutationBody =
BodyType<InframonitoringtypesPostableNamespacesDTO>;
export type ListNamespacesMutationError = ErrorType<RenderErrorResponseDTO>;
/**
* @summary List Namespaces for Infra Monitoring
*/
export const useListNamespaces = <
TError = ErrorType<RenderErrorResponseDTO>,
TContext = unknown,
>(options?: {
mutation?: UseMutationOptions<
Awaited<ReturnType<typeof listNamespaces>>,
TError,
{ data: BodyType<InframonitoringtypesPostableNamespacesDTO> },
TContext
>;
}): UseMutationResult<
Awaited<ReturnType<typeof listNamespaces>>,
TError,
{ data: BodyType<InframonitoringtypesPostableNamespacesDTO> },
TContext
> => {
const mutationOptions = getListNamespacesMutationOptions(options);
return useMutation(mutationOptions);
};
/**
* Returns a paginated list of Kubernetes nodes with key metrics: CPU usage, CPU allocatable, memory working set, memory allocatable, and per-group readyNodesCount / notReadyNodesCount derived from each node's latest k8s.node.condition_ready value in the window. Each node includes metadata attributes (k8s.node.uid, k8s.cluster.name). The response type is 'list' for the default k8s.node.name grouping (each row is one node with its current condition string: ready / not_ready / '') or 'grouped_list' for custom groupBy keys (each row aggregates nodes in the group with readyNodesCount and notReadyNodesCount; condition stays empty). Supports filtering via a filter expression, custom groupBy, ordering by cpu / cpu_allocatable / memory / memory_allocatable, and pagination via offset/limit. Also reports missing required metrics and whether the requested time range falls before the data retention boundary. Numeric metric fields (nodeCPU, nodeCPUAllocatable, nodeMemory, nodeMemoryAllocatable) return -1 as a sentinel when no data is available for that field.
* @summary List Nodes for Infra Monitoring
*/
export const listNodes = (
inframonitoringtypesPostableNodesDTO: BodyType<InframonitoringtypesPostableNodesDTO>,
signal?: AbortSignal,
) => {
return GeneratedAPIInstance<ListNodes200>({
url: `/api/v2/infra_monitoring/nodes`,
method: 'POST',
headers: { 'Content-Type': 'application/json' },
data: inframonitoringtypesPostableNodesDTO,
signal,
});
};
export const getListNodesMutationOptions = <
TError = ErrorType<RenderErrorResponseDTO>,
TContext = unknown,
>(options?: {
mutation?: UseMutationOptions<
Awaited<ReturnType<typeof listNodes>>,
TError,
{ data: BodyType<InframonitoringtypesPostableNodesDTO> },
TContext
>;
}): UseMutationOptions<
Awaited<ReturnType<typeof listNodes>>,
TError,
{ data: BodyType<InframonitoringtypesPostableNodesDTO> },
TContext
> => {
const mutationKey = ['listNodes'];
const { mutation: mutationOptions } = options
? options.mutation &&
'mutationKey' in options.mutation &&
options.mutation.mutationKey
? options
: { ...options, mutation: { ...options.mutation, mutationKey } }
: { mutation: { mutationKey } };
const mutationFn: MutationFunction<
Awaited<ReturnType<typeof listNodes>>,
{ data: BodyType<InframonitoringtypesPostableNodesDTO> }
> = (props) => {
const { data } = props ?? {};
return listNodes(data);
};
return { mutationFn, ...mutationOptions };
};
export type ListNodesMutationResult = NonNullable<
Awaited<ReturnType<typeof listNodes>>
>;
export type ListNodesMutationBody =
BodyType<InframonitoringtypesPostableNodesDTO>;
export type ListNodesMutationError = ErrorType<RenderErrorResponseDTO>;
/**
* @summary List Nodes for Infra Monitoring
*/
export const useListNodes = <
TError = ErrorType<RenderErrorResponseDTO>,
TContext = unknown,
>(options?: {
mutation?: UseMutationOptions<
Awaited<ReturnType<typeof listNodes>>,
TError,
{ data: BodyType<InframonitoringtypesPostableNodesDTO> },
TContext
>;
}): UseMutationResult<
Awaited<ReturnType<typeof listNodes>>,
TError,
{ data: BodyType<InframonitoringtypesPostableNodesDTO> },
TContext
> => {
const mutationOptions = getListNodesMutationOptions(options);
return useMutation(mutationOptions);
};
/**
* Returns a paginated list of Kubernetes pods with key metrics: CPU usage, CPU request/limit utilization, memory working set, memory request/limit utilization, current pod phase (pending/running/succeeded/failed/unknown), and pod age (ms since start time). Each pod includes metadata attributes (namespace, node, workload owner such as deployment/statefulset/daemonset/job/cronjob, cluster). Supports filtering via a filter expression, custom groupBy to aggregate pods by any attribute, ordering by any of the six metrics (cpu, cpu_request, cpu_limit, memory, memory_request, memory_limit), and pagination via offset/limit. The response type is 'list' for the default k8s.pod.uid grouping (each row is one pod with its current phase) or 'grouped_list' for custom groupBy keys (each row aggregates pods in the group with per-phase counts: pendingPodCount, runningPodCount, succeededPodCount, failedPodCount, unknownPodCount derived from each pod's latest phase in the window). Also reports missing required metrics and whether the requested time range falls before the data retention boundary. Numeric metric fields (podCPU, podCPURequest, podCPULimit, podMemory, podMemoryRequest, podMemoryLimit, podAge) return -1 as a sentinel when no data is available for that field.
* @summary List Pods for Infra Monitoring

View File

@@ -3243,6 +3243,146 @@ export interface InframonitoringtypesHostsDTO {
warning?: Querybuildertypesv5QueryWarnDataDTO;
}
/**
* @nullable
*/
export type InframonitoringtypesNamespaceRecordDTOMeta = {
[key: string]: unknown;
} | null;
export interface InframonitoringtypesNamespaceRecordDTO {
/**
* @type integer
*/
failedPodCount: number;
/**
* @type object
* @nullable true
*/
meta: InframonitoringtypesNamespaceRecordDTOMeta;
/**
* @type number
* @format double
*/
namespaceCPU: number;
/**
* @type number
* @format double
*/
namespaceMemory: number;
/**
* @type string
*/
namespaceName: string;
/**
* @type integer
*/
pendingPodCount: number;
/**
* @type integer
*/
runningPodCount: number;
/**
* @type integer
*/
succeededPodCount: number;
/**
* @type integer
*/
unknownPodCount: number;
}
export interface InframonitoringtypesNamespacesDTO {
/**
* @type boolean
*/
endTimeBeforeRetention: boolean;
/**
* @type array
* @nullable true
*/
records: InframonitoringtypesNamespaceRecordDTO[] | null;
requiredMetricsCheck: InframonitoringtypesRequiredMetricsCheckDTO;
/**
* @type integer
*/
total: number;
type: InframonitoringtypesResponseTypeDTO;
warning?: Querybuildertypesv5QueryWarnDataDTO;
}
export enum InframonitoringtypesNodeConditionDTO {
ready = 'ready',
not_ready = 'not_ready',
'' = '',
}
/**
* @nullable
*/
export type InframonitoringtypesNodeRecordDTOMeta = {
[key: string]: unknown;
} | null;
export interface InframonitoringtypesNodeRecordDTO {
condition: InframonitoringtypesNodeConditionDTO;
/**
* @type object
* @nullable true
*/
meta: InframonitoringtypesNodeRecordDTOMeta;
/**
* @type number
* @format double
*/
nodeCPU: number;
/**
* @type number
* @format double
*/
nodeCPUAllocatable: number;
/**
* @type number
* @format double
*/
nodeMemory: number;
/**
* @type number
* @format double
*/
nodeMemoryAllocatable: number;
/**
* @type string
*/
nodeName: string;
/**
* @type integer
*/
notReadyNodesCount: number;
/**
* @type integer
*/
readyNodesCount: number;
}
export interface InframonitoringtypesNodesDTO {
/**
* @type boolean
*/
endTimeBeforeRetention: boolean;
/**
* @type array
* @nullable true
*/
records: InframonitoringtypesNodeRecordDTO[] | null;
requiredMetricsCheck: InframonitoringtypesRequiredMetricsCheckDTO;
/**
* @type integer
*/
total: number;
type: InframonitoringtypesResponseTypeDTO;
warning?: Querybuildertypesv5QueryWarnDataDTO;
}
export enum InframonitoringtypesPodPhaseDTO {
pending = 'pending',
running = 'running',
@@ -3373,6 +3513,62 @@ export interface InframonitoringtypesPostableHostsDTO {
start: number;
}
export interface InframonitoringtypesPostableNamespacesDTO {
/**
* @type integer
* @format int64
*/
end: number;
filter?: Querybuildertypesv5FilterDTO;
/**
* @type array
* @nullable true
*/
groupBy?: Querybuildertypesv5GroupByKeyDTO[] | null;
/**
* @type integer
*/
limit: number;
/**
* @type integer
*/
offset?: number;
orderBy?: Querybuildertypesv5OrderByDTO;
/**
* @type integer
* @format int64
*/
start: number;
}
export interface InframonitoringtypesPostableNodesDTO {
/**
* @type integer
* @format int64
*/
end: number;
filter?: Querybuildertypesv5FilterDTO;
/**
* @type array
* @nullable true
*/
groupBy?: Querybuildertypesv5GroupByKeyDTO[] | null;
/**
* @type integer
*/
limit: number;
/**
* @type integer
*/
offset?: number;
orderBy?: Querybuildertypesv5OrderByDTO;
/**
* @type integer
* @format int64
*/
start: number;
}
export interface InframonitoringtypesPostablePodsDTO {
/**
* @type integer
@@ -7480,6 +7676,22 @@ export type ListHosts200 = {
status: string;
};
export type ListNamespaces200 = {
data: InframonitoringtypesNamespacesDTO;
/**
* @type string
*/
status: string;
};
export type ListNodes200 = {
data: InframonitoringtypesNodesDTO;
/**
* @type string
*/
status: string;
};
export type ListPods200 = {
data: InframonitoringtypesPodsDTO;
/**

View File

@@ -1,94 +0,0 @@
package alertmanagerserver
import (
"context"
"log/slog"
"sync"
"time"
"github.com/prometheus/alertmanager/types"
"github.com/prometheus/common/model"
"github.com/SigNoz/signoz/pkg/types/ruletypes"
)
// MaintenanceMuter implements types.Muter for maintenance windows.
// It suppresses alerts whose ruleId label matches an active maintenance schedule.
// Results are cached for cacheTTL to avoid a DB query on every per-alert check.
type MaintenanceMuter struct {
maintenanceStore ruletypes.MaintenanceStore
orgID string
logger *slog.Logger
mu sync.RWMutex
cached []*ruletypes.PlannedMaintenance
cacheExpiry time.Time
}
const maintenanceCacheTTL = 30 * time.Second
func NewMaintenanceMuter(store ruletypes.MaintenanceStore, orgID string, logger *slog.Logger) *MaintenanceMuter {
return &MaintenanceMuter{
maintenanceStore: store,
orgID: orgID,
logger: logger,
}
}
func (m *MaintenanceMuter) Mutes(ctx context.Context, lset model.LabelSet) bool {
ruleID := string(lset[ruletypes.AlertRuleIDLabel])
if ruleID == "" {
return false
}
now := time.Now()
for _, mw := range m.getMaintenances(ctx) {
if mw.ShouldSkip(ruleID, now) {
return true
}
}
return false
}
func (m *MaintenanceMuter) getMaintenances(ctx context.Context) []*ruletypes.PlannedMaintenance {
m.mu.RLock()
if time.Now().Before(m.cacheExpiry) {
cached := m.cached
m.mu.RUnlock()
return cached
}
m.mu.RUnlock()
m.mu.Lock()
defer m.mu.Unlock()
// Double-check after acquiring write lock.
if time.Now().Before(m.cacheExpiry) {
return m.cached
}
mws, err := m.maintenanceStore.ListPlannedMaintenance(ctx, m.orgID)
if err != nil {
m.logger.ErrorContext(ctx, "failed to list planned maintenance windows; alerts will not be suppressed", slog.String("org_id", m.orgID))
return m.cached // return stale (potentially empty) cache on error
}
m.cached = mws
m.cacheExpiry = time.Now().Add(maintenanceCacheTTL)
return m.cached
}
// maintenanceMuteStage wraps MaintenanceMuter as a notify.Stage.
// We implement the stage directly rather than using notify.NewMuteStage to avoid
// a dependency on the unexported *notify.Metrics field of PipelineBuilder.
type maintenanceMuteStage struct {
muter *MaintenanceMuter
}
func (s *maintenanceMuteStage) Exec(ctx context.Context, _ *slog.Logger, alerts ...*types.Alert) (context.Context, []*types.Alert, error) {
filtered := make([]*types.Alert, 0, len(alerts))
for _, a := range alerts {
if !s.muter.Mutes(ctx, a.Labels) {
filtered = append(filtered, a)
}
}
return ctx, filtered, nil
}

View File

@@ -1,120 +0,0 @@
// Copyright (c) 2026 SigNoz, Inc.
// Copyright 2015 Prometheus Team
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
package alertmanagerserver
// pipelineBuilder is a local copy of notify.PipelineBuilder that injects
// the maintenance mute stage immediately before the receiver stage.
//
// We maintain our own copy so we can control exactly where in the pipeline
// the maintenance stage runs (between the silence stage and the receiver),
// which is not possible by wrapping the output of the upstream builder.
//
// Upstream pipeline order (notify.PipelineBuilder.New, notify.go:444):
//
// GossipSettle → Inhibit → TimeActive → TimeMute → Silence → [mms] → Receiver
import (
"time"
"github.com/prometheus/alertmanager/featurecontrol"
"github.com/prometheus/alertmanager/inhibit"
"github.com/prometheus/alertmanager/nflog/nflogpb"
"github.com/prometheus/alertmanager/notify"
"github.com/prometheus/alertmanager/silence"
"github.com/prometheus/alertmanager/timeinterval"
"github.com/prometheus/alertmanager/types"
"github.com/prometheus/client_golang/prometheus"
)
type pipelineBuilder struct {
metrics *notify.Metrics
ff featurecontrol.Flagger
muter *MaintenanceMuter
}
func newPipelineBuilder(
r prometheus.Registerer,
ff featurecontrol.Flagger,
muter *MaintenanceMuter,
) *pipelineBuilder {
return &pipelineBuilder{
metrics: notify.NewMetrics(r, ff),
ff: ff,
muter: muter,
}
}
// New returns a map of receivers to Stages, mirroring notify.PipelineBuilder.New
// but inserting a maintenanceMuteStage between the silence stage and the receiver.
func (pb *pipelineBuilder) New(
receivers map[string][]notify.Integration,
wait func() time.Duration,
inhibitor *inhibit.Inhibitor,
silencer *silence.Silencer,
intervener *timeinterval.Intervener,
marker types.GroupMarker,
notificationLog notify.NotificationLog,
peer notify.Peer,
) notify.RoutingStage {
rs := make(notify.RoutingStage, len(receivers))
ms := notify.NewGossipSettleStage(peer)
is := notify.NewMuteStage(inhibitor, pb.metrics)
tas := notify.NewTimeActiveStage(intervener, marker, pb.metrics)
tms := notify.NewTimeMuteStage(intervener, marker, pb.metrics)
ss := notify.NewMuteStage(silencer, pb.metrics)
var mms *maintenanceMuteStage
if pb.muter != nil {
mms = &maintenanceMuteStage{muter: pb.muter}
}
for name := range receivers {
stages := notify.MultiStage{ms, is, tas, tms, ss}
if mms != nil {
stages = append(stages, mms)
}
stages = append(stages, createReceiverStage(name, receivers[name], wait, notificationLog, pb.metrics))
rs[name] = stages
}
pb.metrics.InitializeFor(receivers)
return rs
}
// createReceiverStage is a copy of notify.createReceiverStage (unexported upstream).
func createReceiverStage(
name string,
integrations []notify.Integration,
wait func() time.Duration,
notificationLog notify.NotificationLog,
metrics *notify.Metrics,
) notify.Stage {
var fs notify.FanoutStage
for i := range integrations {
recv := &nflogpb.Receiver{
GroupName: name,
Integration: integrations[i].Name(),
Idx: uint32(integrations[i].Index()),
}
var s notify.MultiStage
s = append(s, notify.NewWaitStage(wait))
s = append(s, notify.NewDedupStage(&integrations[i], notificationLog, recv))
s = append(s, notify.NewRetryStage(integrations[i], name, metrics))
s = append(s, notify.NewSetNotifiesStage(notificationLog, recv))
fs = append(fs, s)
}
return fs
}

View File

@@ -26,13 +26,14 @@ import (
"github.com/SigNoz/signoz/pkg/alertmanager/nfmanager"
"github.com/SigNoz/signoz/pkg/errors"
"github.com/SigNoz/signoz/pkg/types/alertmanagertypes"
ruletypes "github.com/SigNoz/signoz/pkg/types/ruletypes"
)
// This is not a real snapshot file and will never be used. We need this placeholder to ensure maintenance runs on shutdown.
// See https://github.com/prometheus/alertmanager/blob/3ee2cd0f1271e277295c02b6160507b4d193dde2/silence/silence.go#L435-L438
// and https://github.com/prometheus/alertmanager/blob/3b06b97af4d146e141af92885a185891eb79a5b0/nflog/nflog.go#L362.
var snapfnoop string = "snapfnoop"
var (
// This is not a real file and will never be used. We need this placeholder to ensure maintenance runs on shutdown. See
// https://github.com/prometheus/server/blob/3ee2cd0f1271e277295c02b6160507b4d193dde2/silence/silence.go#L435-L438
// and https://github.com/prometheus/server/blob/3b06b97af4d146e141af92885a185891eb79a5b0/nflog/nflog.go#L362.
snapfnoop string = "snapfnoop"
)
type Server struct {
// logger is the logger for the alertmanager
@@ -62,7 +63,7 @@ type Server struct {
silencer *silence.Silencer
silences *silence.Silences
timeIntervals map[string][]timeinterval.TimeInterval
pipelineBuilder *pipelineBuilder
pipelineBuilder *notify.PipelineBuilder
marker *alertmanagertypes.MemMarker
tmpl *template.Template
wg sync.WaitGroup
@@ -70,16 +71,7 @@ type Server struct {
notificationManager nfmanager.NotificationManager
}
func New(
ctx context.Context,
logger *slog.Logger,
registry prometheus.Registerer,
srvConfig Config,
orgID string,
stateStore alertmanagertypes.StateStore,
nfManager nfmanager.NotificationManager,
maintenanceStore ruletypes.MaintenanceStore,
) (*Server, error) {
func New(ctx context.Context, logger *slog.Logger, registry prometheus.Registerer, srvConfig Config, orgID string, stateStore alertmanagertypes.StateStore, nfManager nfmanager.NotificationManager) (*Server, error) {
server := &Server{
logger: logger.With(slog.String("pkg", "go.signoz.io/pkg/alertmanager/alertmanagerserver")),
registry: registry,
@@ -168,6 +160,7 @@ func New(
return c, server.stateStore.Set(ctx, storableSilences)
})
}()
// Start maintenance for notification logs
@@ -203,11 +196,7 @@ func New(
return nil, err
}
var muter *MaintenanceMuter
if maintenanceStore != nil {
muter = NewMaintenanceMuter(maintenanceStore, orgID, server.logger)
}
server.pipelineBuilder = newPipelineBuilder(signozRegisterer, featurecontrol.NoopFlags{}, muter)
server.pipelineBuilder = notify.NewPipelineBuilder(signozRegisterer, featurecontrol.NoopFlags{})
server.dispatcherMetrics = NewDispatcherMetrics(false, signozRegisterer)
return server, nil

View File

@@ -90,7 +90,7 @@ func TestEndToEndAlertManagerFlow(t *testing.T) {
stateStore := alertmanagertypestest.NewStateStore()
registry := prometheus.NewRegistry()
logger := slog.New(slog.DiscardHandler)
server, err := New(context.Background(), logger, registry, srvCfg, orgID, stateStore, notificationManager, nil)
server, err := New(context.Background(), logger, registry, srvCfg, orgID, stateStore, notificationManager)
require.NoError(t, err)
amConfig, err := alertmanagertypes.NewDefaultConfig(srvCfg.Global, srvCfg.Route, orgID)
require.NoError(t, err)

View File

@@ -25,7 +25,7 @@ import (
func TestServerSetConfigAndStop(t *testing.T) {
notificationManager := nfmanagertest.NewMock()
server, err := New(context.Background(), slog.New(slog.DiscardHandler), prometheus.NewRegistry(), NewConfig(), "1", alertmanagertypestest.NewStateStore(), notificationManager, nil)
server, err := New(context.Background(), slog.New(slog.DiscardHandler), prometheus.NewRegistry(), NewConfig(), "1", alertmanagertypestest.NewStateStore(), notificationManager)
require.NoError(t, err)
amConfig, err := alertmanagertypes.NewDefaultConfig(alertmanagertypes.GlobalConfig{}, alertmanagertypes.RouteConfig{GroupInterval: 1 * time.Minute, RepeatInterval: 1 * time.Minute, GroupWait: 1 * time.Minute}, "1")
@@ -37,7 +37,7 @@ func TestServerSetConfigAndStop(t *testing.T) {
func TestServerTestReceiverTypeWebhook(t *testing.T) {
notificationManager := nfmanagertest.NewMock()
server, err := New(context.Background(), slog.New(slog.DiscardHandler), prometheus.NewRegistry(), NewConfig(), "1", alertmanagertypestest.NewStateStore(), notificationManager, nil)
server, err := New(context.Background(), slog.New(slog.DiscardHandler), prometheus.NewRegistry(), NewConfig(), "1", alertmanagertypestest.NewStateStore(), notificationManager)
require.NoError(t, err)
amConfig, err := alertmanagertypes.NewDefaultConfig(alertmanagertypes.GlobalConfig{}, alertmanagertypes.RouteConfig{GroupInterval: 1 * time.Minute, RepeatInterval: 1 * time.Minute, GroupWait: 1 * time.Minute}, "1")
@@ -85,7 +85,7 @@ func TestServerPutAlerts(t *testing.T) {
srvCfg := NewConfig()
srvCfg.Route.GroupInterval = 1 * time.Second
notificationManager := nfmanagertest.NewMock()
server, err := New(context.Background(), slog.New(slog.DiscardHandler), prometheus.NewRegistry(), srvCfg, "1", stateStore, notificationManager, nil)
server, err := New(context.Background(), slog.New(slog.DiscardHandler), prometheus.NewRegistry(), srvCfg, "1", stateStore, notificationManager)
require.NoError(t, err)
amConfig, err := alertmanagertypes.NewDefaultConfig(srvCfg.Global, srvCfg.Route, "1")
@@ -133,7 +133,7 @@ func TestServerTestAlert(t *testing.T) {
srvCfg := NewConfig()
srvCfg.Route.GroupInterval = 1 * time.Second
notificationManager := nfmanagertest.NewMock()
server, err := New(context.Background(), slog.New(slog.DiscardHandler), prometheus.NewRegistry(), srvCfg, "1", stateStore, notificationManager, nil)
server, err := New(context.Background(), slog.New(slog.DiscardHandler), prometheus.NewRegistry(), srvCfg, "1", stateStore, notificationManager)
require.NoError(t, err)
amConfig, err := alertmanagertypes.NewDefaultConfig(srvCfg.Global, srvCfg.Route, "1")
@@ -238,7 +238,7 @@ func TestServerTestAlertContinuesOnFailure(t *testing.T) {
srvCfg := NewConfig()
srvCfg.Route.GroupInterval = 1 * time.Second
notificationManager := nfmanagertest.NewMock()
server, err := New(context.Background(), slog.New(slog.DiscardHandler), prometheus.NewRegistry(), srvCfg, "1", stateStore, notificationManager, nil)
server, err := New(context.Background(), slog.New(slog.DiscardHandler), prometheus.NewRegistry(), srvCfg, "1", stateStore, notificationManager)
require.NoError(t, err)
amConfig, err := alertmanagertypes.NewDefaultConfig(srvCfg.Global, srvCfg.Route, "1")

View File

@@ -14,7 +14,6 @@ import (
"github.com/SigNoz/signoz/pkg/factory"
"github.com/SigNoz/signoz/pkg/modules/organization"
"github.com/SigNoz/signoz/pkg/types/alertmanagertypes"
ruletypes "github.com/SigNoz/signoz/pkg/types/ruletypes"
)
type Service struct {
@@ -40,18 +39,16 @@ type Service struct {
serversMtx sync.RWMutex
notificationManager nfmanager.NotificationManager
maintenanceStore ruletypes.MaintenanceStore
}
func New(
ctx context.Context,
settings factory.ScopedProviderSettings,
config alertmanagerserver.Config,
stateStore alertmanagertypes.StateStore,
configStore alertmanagertypes.ConfigStore,
orgGetter organization.Getter,
nfManager nfmanager.NotificationManager,
maintenanceStore ruletypes.MaintenanceStore,
) *Service {
service := &Service{
config: config,
@@ -62,7 +59,6 @@ func New(
servers: make(map[string]*alertmanagerserver.Server),
serversMtx: sync.RWMutex{},
notificationManager: nfManager,
maintenanceStore: maintenanceStore,
}
return service
@@ -181,10 +177,7 @@ func (service *Service) newServer(ctx context.Context, orgID string) (*alertmana
return nil, err
}
server, err := alertmanagerserver.New(
ctx, service.settings.Logger(), service.settings.PrometheusRegisterer(), service.config, orgID,
service.stateStore, service.notificationManager, service.maintenanceStore,
)
server, err := alertmanagerserver.New(ctx, service.settings.Logger(), service.settings.PrometheusRegisterer(), service.config, orgID, service.stateStore, service.notificationManager)
if err != nil {
return nil, err
}

View File

@@ -20,7 +20,6 @@ import (
"github.com/SigNoz/signoz/pkg/types"
"github.com/SigNoz/signoz/pkg/types/alertmanagertypes"
"github.com/SigNoz/signoz/pkg/types/authtypes"
ruletypes "github.com/SigNoz/signoz/pkg/types/ruletypes"
"github.com/SigNoz/signoz/pkg/valuer"
)
@@ -31,49 +30,35 @@ type provider struct {
configStore alertmanagertypes.ConfigStore
stateStore alertmanagertypes.StateStore
notificationManager nfmanager.NotificationManager
maintenanceStore ruletypes.MaintenanceStore
stopC chan struct{}
}
func NewFactory(
sqlstore sqlstore.SQLStore,
orgGetter organization.Getter,
notificationManager nfmanager.NotificationManager,
maintenanceStore ruletypes.MaintenanceStore,
) factory.ProviderFactory[alertmanager.Alertmanager, alertmanager.Config] {
func NewFactory(sqlstore sqlstore.SQLStore, orgGetter organization.Getter, notificationManager nfmanager.NotificationManager) factory.ProviderFactory[alertmanager.Alertmanager, alertmanager.Config] {
return factory.NewProviderFactory(factory.MustNewName("signoz"), func(ctx context.Context, settings factory.ProviderSettings, config alertmanager.Config) (alertmanager.Alertmanager, error) {
return New(settings, config, sqlstore, orgGetter, notificationManager, maintenanceStore)
return New(ctx, settings, config, sqlstore, orgGetter, notificationManager)
})
}
func New(
providerSettings factory.ProviderSettings,
config alertmanager.Config,
sqlstore sqlstore.SQLStore,
orgGetter organization.Getter,
notificationManager nfmanager.NotificationManager,
maintenanceStore ruletypes.MaintenanceStore,
) (*provider, error) {
func New(ctx context.Context, providerSettings factory.ProviderSettings, config alertmanager.Config, sqlstore sqlstore.SQLStore, orgGetter organization.Getter, notificationManager nfmanager.NotificationManager) (*provider, error) {
settings := factory.NewScopedProviderSettings(providerSettings, "github.com/SigNoz/signoz/pkg/alertmanager/signozalertmanager")
configStore := sqlalertmanagerstore.NewConfigStore(sqlstore)
stateStore := sqlalertmanagerstore.NewStateStore(sqlstore)
p := &provider{
service: alertmanager.New(
ctx,
settings,
config.Signoz.Config,
stateStore,
configStore,
orgGetter,
notificationManager,
maintenanceStore,
),
settings: settings,
config: config,
configStore: configStore,
stateStore: stateStore,
notificationManager: notificationManager,
maintenanceStore: maintenanceStore,
stopC: make(chan struct{}),
}

View File

@@ -48,5 +48,43 @@ func (provider *provider) addInfraMonitoringRoutes(router *mux.Router) error {
return err
}
if err := router.Handle("/api/v2/infra_monitoring/nodes", handler.New(
provider.authZ.ViewAccess(provider.infraMonitoringHandler.ListNodes),
handler.OpenAPIDef{
ID: "ListNodes",
Tags: []string{"inframonitoring"},
Summary: "List Nodes for Infra Monitoring",
Description: "Returns a paginated list of Kubernetes nodes with key metrics: CPU usage, CPU allocatable, memory working set, memory allocatable, and per-group readyNodesCount / notReadyNodesCount derived from each node's latest k8s.node.condition_ready value in the window. Each node includes metadata attributes (k8s.node.uid, k8s.cluster.name). The response type is 'list' for the default k8s.node.name grouping (each row is one node with its current condition string: ready / not_ready / '') or 'grouped_list' for custom groupBy keys (each row aggregates nodes in the group with readyNodesCount and notReadyNodesCount; condition stays empty). Supports filtering via a filter expression, custom groupBy, ordering by cpu / cpu_allocatable / memory / memory_allocatable, and pagination via offset/limit. Also reports missing required metrics and whether the requested time range falls before the data retention boundary. Numeric metric fields (nodeCPU, nodeCPUAllocatable, nodeMemory, nodeMemoryAllocatable) return -1 as a sentinel when no data is available for that field.",
Request: new(inframonitoringtypes.PostableNodes),
RequestContentType: "application/json",
Response: new(inframonitoringtypes.Nodes),
ResponseContentType: "application/json",
SuccessStatusCode: http.StatusOK,
ErrorStatusCodes: []int{http.StatusBadRequest, http.StatusUnauthorized},
Deprecated: false,
SecuritySchemes: newSecuritySchemes(types.RoleViewer),
})).Methods(http.MethodPost).GetError(); err != nil {
return err
}
if err := router.Handle("/api/v2/infra_monitoring/namespaces", handler.New(
provider.authZ.ViewAccess(provider.infraMonitoringHandler.ListNamespaces),
handler.OpenAPIDef{
ID: "ListNamespaces",
Tags: []string{"inframonitoring"},
Summary: "List Namespaces for Infra Monitoring",
Description: "Returns a paginated list of Kubernetes namespaces with key aggregated pod metrics: CPU usage and memory working set (summed across pods in the group), plus per-group pod counts bucketed by each pod's latest k8s.pod.phase value in the window (pendingPodCount, runningPodCount, succeededPodCount, failedPodCount, unknownPodCount). Each namespace includes metadata attributes (k8s.namespace.name, k8s.cluster.name). The response type is 'list' for the default k8s.namespace.name grouping or 'grouped_list' for custom groupBy keys; in both modes every row aggregates pods in the group. Supports filtering via a filter expression, custom groupBy, ordering by cpu / memory, and pagination via offset/limit. Also reports missing required metrics and whether the requested time range falls before the data retention boundary. Numeric metric fields (namespaceCPU, namespaceMemory) return -1 as a sentinel when no data is available for that field.",
Request: new(inframonitoringtypes.PostableNamespaces),
RequestContentType: "application/json",
Response: new(inframonitoringtypes.Namespaces),
ResponseContentType: "application/json",
SuccessStatusCode: http.StatusOK,
ErrorStatusCodes: []int{http.StatusBadRequest, http.StatusUnauthorized},
Deprecated: false,
SecuritySchemes: newSecuritySchemes(types.RoleViewer),
})).Methods(http.MethodPost).GetError(); err != nil {
return err
}
return nil
}

View File

@@ -69,3 +69,51 @@ func (h *handler) ListPods(rw http.ResponseWriter, req *http.Request) {
render.Success(rw, http.StatusOK, result)
}
func (h *handler) ListNodes(rw http.ResponseWriter, req *http.Request) {
claims, err := authtypes.ClaimsFromContext(req.Context())
if err != nil {
render.Error(rw, err)
return
}
orgID := valuer.MustNewUUID(claims.OrgID)
var parsedReq inframonitoringtypes.PostableNodes
if err := binding.JSON.BindBody(req.Body, &parsedReq); err != nil {
render.Error(rw, err)
return
}
result, err := h.module.ListNodes(req.Context(), orgID, &parsedReq)
if err != nil {
render.Error(rw, err)
return
}
render.Success(rw, http.StatusOK, result)
}
func (h *handler) ListNamespaces(rw http.ResponseWriter, req *http.Request) {
claims, err := authtypes.ClaimsFromContext(req.Context())
if err != nil {
render.Error(rw, err)
return
}
orgID := valuer.MustNewUUID(claims.OrgID)
var parsedReq inframonitoringtypes.PostableNamespaces
if err := binding.JSON.BindBody(req.Body, &parsedReq); err != nil {
render.Error(rw, err)
return
}
result, err := h.module.ListNamespaces(req.Context(), orgID, &parsedReq)
if err != nil {
render.Error(rw, err)
return
}
render.Success(rw, http.StatusOK, result)
}

View File

@@ -23,3 +23,9 @@ type podPhaseCounts struct {
Failed int
Unknown int
}
// nodeConditionCounts holds per-group node counts bucketed by latest condition_ready in window.
type nodeConditionCounts struct {
Ready int
NotReady int
}

View File

@@ -242,3 +242,175 @@ func (m *module) ListPods(ctx context.Context, orgID valuer.UUID, req *inframoni
return resp, nil
}
func (m *module) ListNodes(ctx context.Context, orgID valuer.UUID, req *inframonitoringtypes.PostableNodes) (*inframonitoringtypes.Nodes, error) {
if err := req.Validate(); err != nil {
return nil, err
}
resp := &inframonitoringtypes.Nodes{}
if req.OrderBy == nil {
req.OrderBy = &qbtypes.OrderBy{
Key: qbtypes.OrderByKey{
TelemetryFieldKey: telemetrytypes.TelemetryFieldKey{
Name: inframonitoringtypes.NodesOrderByCPU,
},
},
Direction: qbtypes.OrderDirectionDesc,
}
}
if len(req.GroupBy) == 0 {
req.GroupBy = []qbtypes.GroupByKey{nodeNameGroupByKey}
resp.Type = inframonitoringtypes.ResponseTypeList
} else {
resp.Type = inframonitoringtypes.ResponseTypeGroupedList
}
missingMetrics, minFirstReportedUnixMilli, err := m.getMetricsExistenceAndEarliestTime(ctx, nodesTableMetricNamesList)
if err != nil {
return nil, err
}
if len(missingMetrics) > 0 {
resp.RequiredMetricsCheck = inframonitoringtypes.RequiredMetricsCheck{MissingMetrics: missingMetrics}
resp.Records = []inframonitoringtypes.NodeRecord{}
resp.Total = 0
return resp, nil
}
if req.End < int64(minFirstReportedUnixMilli) {
resp.EndTimeBeforeRetention = true
resp.Records = []inframonitoringtypes.NodeRecord{}
resp.Total = 0
return resp, nil
}
resp.RequiredMetricsCheck = inframonitoringtypes.RequiredMetricsCheck{MissingMetrics: []string{}}
metadataMap, err := m.getNodesTableMetadata(ctx, req)
if err != nil {
return nil, err
}
resp.Total = len(metadataMap)
pageGroups, err := m.getTopNodeGroups(ctx, orgID, req, metadataMap)
if err != nil {
return nil, err
}
if len(pageGroups) == 0 {
resp.Records = []inframonitoringtypes.NodeRecord{}
return resp, nil
}
filterExpr := ""
if req.Filter != nil {
filterExpr = req.Filter.Expression
}
fullQueryReq := buildFullQueryRequest(req.Start, req.End, filterExpr, req.GroupBy, pageGroups, m.newNodesTableListQuery())
queryResp, err := m.querier.QueryRange(ctx, orgID, fullQueryReq)
if err != nil {
return nil, err
}
conditionCounts, err := m.getPerGroupNodeConditionCounts(ctx, req, pageGroups)
if err != nil {
return nil, err
}
isNodeNameInGroupBy := isKeyInGroupByAttrs(req.GroupBy, nodeNameAttrKey)
resp.Records = buildNodeRecords(isNodeNameInGroupBy, queryResp, pageGroups, req.GroupBy, metadataMap, conditionCounts)
resp.Warning = queryResp.Warning
return resp, nil
}
func (m *module) ListNamespaces(ctx context.Context, orgID valuer.UUID, req *inframonitoringtypes.PostableNamespaces) (*inframonitoringtypes.Namespaces, error) {
if err := req.Validate(); err != nil {
return nil, err
}
resp := &inframonitoringtypes.Namespaces{}
if req.OrderBy == nil {
req.OrderBy = &qbtypes.OrderBy{
Key: qbtypes.OrderByKey{
TelemetryFieldKey: telemetrytypes.TelemetryFieldKey{
Name: inframonitoringtypes.NamespacesOrderByCPU,
},
},
Direction: qbtypes.OrderDirectionDesc,
}
}
if len(req.GroupBy) == 0 {
req.GroupBy = []qbtypes.GroupByKey{namespaceNameGroupByKey}
resp.Type = inframonitoringtypes.ResponseTypeList
} else {
resp.Type = inframonitoringtypes.ResponseTypeGroupedList
}
missingMetrics, minFirstReportedUnixMilli, err := m.getMetricsExistenceAndEarliestTime(ctx, namespacesTableMetricNamesList)
if err != nil {
return nil, err
}
if len(missingMetrics) > 0 {
resp.RequiredMetricsCheck = inframonitoringtypes.RequiredMetricsCheck{MissingMetrics: missingMetrics}
resp.Records = []inframonitoringtypes.NamespaceRecord{}
resp.Total = 0
return resp, nil
}
if req.End < int64(minFirstReportedUnixMilli) {
resp.EndTimeBeforeRetention = true
resp.Records = []inframonitoringtypes.NamespaceRecord{}
resp.Total = 0
return resp, nil
}
resp.RequiredMetricsCheck = inframonitoringtypes.RequiredMetricsCheck{MissingMetrics: []string{}}
metadataMap, err := m.getNamespacesTableMetadata(ctx, req)
if err != nil {
return nil, err
}
resp.Total = len(metadataMap)
pageGroups, err := m.getTopNamespaceGroups(ctx, orgID, req, metadataMap)
if err != nil {
return nil, err
}
if len(pageGroups) == 0 {
resp.Records = []inframonitoringtypes.NamespaceRecord{}
return resp, nil
}
filterExpr := ""
if req.Filter != nil {
filterExpr = req.Filter.Expression
}
fullQueryReq := buildFullQueryRequest(req.Start, req.End, filterExpr, req.GroupBy, pageGroups, m.newNamespacesTableListQuery())
queryResp, err := m.querier.QueryRange(ctx, orgID, fullQueryReq)
if err != nil {
return nil, err
}
// Reuse the pods phase-counts CTE function via a temp struct — it reads only
// Start/End/Filter/GroupBy from PostablePods.
phaseCounts, err := m.getPerGroupPodPhaseCounts(ctx, &inframonitoringtypes.PostablePods{
Start: req.Start,
End: req.End,
Filter: req.Filter,
GroupBy: req.GroupBy,
}, pageGroups)
if err != nil {
return nil, err
}
resp.Records = buildNamespaceRecords(queryResp, pageGroups, req.GroupBy, metadataMap, phaseCounts)
resp.Warning = queryResp.Warning
return resp, nil
}

View File

@@ -0,0 +1,121 @@
package implinframonitoring
import (
"context"
"slices"
"github.com/SigNoz/signoz/pkg/types/inframonitoringtypes"
qbtypes "github.com/SigNoz/signoz/pkg/types/querybuildertypes/querybuildertypesv5"
"github.com/SigNoz/signoz/pkg/valuer"
)
// buildNamespaceRecords assembles the page records. Pod phase counts come from
// phaseCounts in both modes; every row is a group of pods, so there's no
// per-row "current phase" concept (unlike pods/nodes list mode).
func buildNamespaceRecords(
resp *qbtypes.QueryRangeResponse,
pageGroups []map[string]string,
groupBy []qbtypes.GroupByKey,
metadataMap map[string]map[string]string,
phaseCounts map[string]podPhaseCounts,
) []inframonitoringtypes.NamespaceRecord {
metricsMap := parseFullQueryResponse(resp, groupBy)
records := make([]inframonitoringtypes.NamespaceRecord, 0, len(pageGroups))
for _, labels := range pageGroups {
compositeKey := compositeKeyFromLabels(labels, groupBy)
namespaceName := labels[namespaceNameAttrKey]
record := inframonitoringtypes.NamespaceRecord{ // initialize with default values
NamespaceName: namespaceName,
NamespaceCPU: -1,
NamespaceMemory: -1,
Meta: map[string]any{},
}
if metrics, ok := metricsMap[compositeKey]; ok {
if v, exists := metrics["A"]; exists {
record.NamespaceCPU = v
}
if v, exists := metrics["D"]; exists {
record.NamespaceMemory = v
}
}
if phaseCountsForGroup, ok := phaseCounts[compositeKey]; ok {
record.PendingPodCount = phaseCountsForGroup.Pending
record.RunningPodCount = phaseCountsForGroup.Running
record.SucceededPodCount = phaseCountsForGroup.Succeeded
record.FailedPodCount = phaseCountsForGroup.Failed
record.UnknownPodCount = phaseCountsForGroup.Unknown
}
if attrs, ok := metadataMap[compositeKey]; ok {
for k, v := range attrs {
record.Meta[k] = v
}
}
records = append(records, record)
}
return records
}
func (m *module) getTopNamespaceGroups(
ctx context.Context,
orgID valuer.UUID,
req *inframonitoringtypes.PostableNamespaces,
metadataMap map[string]map[string]string,
) ([]map[string]string, error) {
orderByKey := req.OrderBy.Key.Name
queryNamesForOrderBy := orderByToNamespacesQueryNames[orderByKey]
rankingQueryName := queryNamesForOrderBy[len(queryNamesForOrderBy)-1]
topReq := &qbtypes.QueryRangeRequest{
Start: uint64(req.Start),
End: uint64(req.End),
RequestType: qbtypes.RequestTypeScalar,
CompositeQuery: qbtypes.CompositeQuery{
Queries: make([]qbtypes.QueryEnvelope, 0, len(queryNamesForOrderBy)),
},
}
for _, envelope := range m.newNamespacesTableListQuery().CompositeQuery.Queries {
if !slices.Contains(queryNamesForOrderBy, envelope.GetQueryName()) {
continue
}
copied := envelope
if copied.Type == qbtypes.QueryTypeBuilder {
existingExpr := ""
if f := copied.GetFilter(); f != nil {
existingExpr = f.Expression
}
reqFilterExpr := ""
if req.Filter != nil {
reqFilterExpr = req.Filter.Expression
}
merged := mergeFilterExpressions(existingExpr, reqFilterExpr)
copied.SetFilter(&qbtypes.Filter{Expression: merged})
copied.SetGroupBy(req.GroupBy)
}
topReq.CompositeQuery.Queries = append(topReq.CompositeQuery.Queries, copied)
}
resp, err := m.querier.QueryRange(ctx, orgID, topReq)
if err != nil {
return nil, err
}
allMetricGroups := parseAndSortGroups(resp, rankingQueryName, req.GroupBy, req.OrderBy.Direction)
return paginateWithBackfill(allMetricGroups, metadataMap, req.GroupBy, req.Offset, req.Limit), nil
}
func (m *module) getNamespacesTableMetadata(ctx context.Context, req *inframonitoringtypes.PostableNamespaces) (map[string]map[string]string, error) {
var nonGroupByAttrs []string
for _, key := range namespaceAttrKeysForMetadata {
if !isKeyInGroupByAttrs(req.GroupBy, key) {
nonGroupByAttrs = append(nonGroupByAttrs, key)
}
}
return m.getMetadata(ctx, namespacesTableMetricNamesList, req.GroupBy, nonGroupByAttrs, req.Filter, req.Start, req.End)
}

View File

@@ -0,0 +1,92 @@
package implinframonitoring
import (
"github.com/SigNoz/signoz/pkg/types/inframonitoringtypes"
"github.com/SigNoz/signoz/pkg/types/metrictypes"
qbtypes "github.com/SigNoz/signoz/pkg/types/querybuildertypes/querybuildertypesv5"
"github.com/SigNoz/signoz/pkg/types/telemetrytypes"
)
const (
namespaceNameAttrKey = "k8s.namespace.name"
)
var namespaceNameGroupByKey = qbtypes.GroupByKey{
TelemetryFieldKey: telemetrytypes.TelemetryFieldKey{
Name: namespaceNameAttrKey,
FieldContext: telemetrytypes.FieldContextResource,
FieldDataType: telemetrytypes.FieldDataTypeString,
},
}
// namespacesTableMetricNamesList drives the existence/retention check.
// Includes k8s.pod.phase so the response short-circuits cleanly when a
// cluster doesn't ship the metric — even though phase isn't part of the
// QB composite query (it's queried separately via getPerGroupPodPhaseCounts).
var namespacesTableMetricNamesList = []string{
"k8s.pod.cpu.usage",
"k8s.pod.memory.working_set",
"k8s.pod.phase",
}
var namespaceAttrKeysForMetadata = []string{
"k8s.namespace.name",
"k8s.cluster.name",
}
var orderByToNamespacesQueryNames = map[string][]string{
inframonitoringtypes.NamespacesOrderByCPU: {"A"},
inframonitoringtypes.NamespacesOrderByMemory: {"D"},
}
// newNamespacesTableListQuery builds the composite QB v5 request for the namespaces list.
// Pod phase counts are derived separately via getPerGroupPodPhaseCounts (works for both
// list and grouped_list modes), so no phase query is included here.
// Query letters A and D are kept aligned with the v1 implementation.
func (m *module) newNamespacesTableListQuery() *qbtypes.QueryRangeRequest {
queries := []qbtypes.QueryEnvelope{
// Query A: CPU usage — sum of pod CPU within the group.
{
Type: qbtypes.QueryTypeBuilder,
Spec: qbtypes.QueryBuilderQuery[qbtypes.MetricAggregation]{
Name: "A",
Signal: telemetrytypes.SignalMetrics,
Aggregations: []qbtypes.MetricAggregation{
{
MetricName: "k8s.pod.cpu.usage",
TimeAggregation: metrictypes.TimeAggregationAvg,
SpaceAggregation: metrictypes.SpaceAggregationSum,
ReduceTo: qbtypes.ReduceToAvg,
},
},
GroupBy: []qbtypes.GroupByKey{namespaceNameGroupByKey},
Disabled: false,
},
},
// Query D: Memory working set — sum of pod memory within the group.
{
Type: qbtypes.QueryTypeBuilder,
Spec: qbtypes.QueryBuilderQuery[qbtypes.MetricAggregation]{
Name: "D",
Signal: telemetrytypes.SignalMetrics,
Aggregations: []qbtypes.MetricAggregation{
{
MetricName: "k8s.pod.memory.working_set",
TimeAggregation: metrictypes.TimeAggregationAvg,
SpaceAggregation: metrictypes.SpaceAggregationSum,
ReduceTo: qbtypes.ReduceToAvg,
},
},
GroupBy: []qbtypes.GroupByKey{namespaceNameGroupByKey},
Disabled: false,
},
},
}
return &qbtypes.QueryRangeRequest{
RequestType: qbtypes.RequestTypeScalar,
CompositeQuery: qbtypes.CompositeQuery{
Queries: queries,
},
}
}

View File

@@ -0,0 +1,299 @@
package implinframonitoring
import (
"context"
"fmt"
"slices"
"strings"
"github.com/SigNoz/signoz/pkg/querybuilder"
"github.com/SigNoz/signoz/pkg/telemetrymetrics"
"github.com/SigNoz/signoz/pkg/types/inframonitoringtypes"
"github.com/SigNoz/signoz/pkg/types/metrictypes"
qbtypes "github.com/SigNoz/signoz/pkg/types/querybuildertypes/querybuildertypesv5"
"github.com/SigNoz/signoz/pkg/valuer"
"github.com/huandu/go-sqlbuilder"
)
// buildNodeRecords assembles the page records. Condition counts come from
// conditionCounts in both modes. In list mode (isNodeNameInGroupBy=true) each
// group is one node, so exactly one count is 1; Condition is derived from
// which one. In grouped_list mode Condition stays NodeConditionNone.
func buildNodeRecords(
isNodeNameInGroupBy bool,
resp *qbtypes.QueryRangeResponse,
pageGroups []map[string]string,
groupBy []qbtypes.GroupByKey,
metadataMap map[string]map[string]string,
conditionCounts map[string]nodeConditionCounts,
) []inframonitoringtypes.NodeRecord {
metricsMap := parseFullQueryResponse(resp, groupBy)
records := make([]inframonitoringtypes.NodeRecord, 0, len(pageGroups))
for _, labels := range pageGroups {
compositeKey := compositeKeyFromLabels(labels, groupBy)
nodeName := labels[nodeNameAttrKey]
record := inframonitoringtypes.NodeRecord{ // initialize with default values
NodeName: nodeName,
Condition: inframonitoringtypes.NodeConditionNone,
NodeCPU: -1,
NodeCPUAllocatable: -1,
NodeMemory: -1,
NodeMemoryAllocatable: -1,
Meta: map[string]any{},
}
if metrics, ok := metricsMap[compositeKey]; ok {
if v, exists := metrics["A"]; exists {
record.NodeCPU = v
}
if v, exists := metrics["B"]; exists {
record.NodeCPUAllocatable = v
}
if v, exists := metrics["C"]; exists {
record.NodeMemory = v
}
if v, exists := metrics["D"]; exists {
record.NodeMemoryAllocatable = v
}
}
if conditionCountsForGroup, ok := conditionCounts[compositeKey]; ok {
record.ReadyNodesCount = conditionCountsForGroup.Ready
record.NotReadyNodesCount = conditionCountsForGroup.NotReady
// In list mode each group is one node; the count==1 bucket identifies the condition.
if isNodeNameInGroupBy {
switch {
case conditionCountsForGroup.Ready == 1:
record.Condition = inframonitoringtypes.NodeConditionReady
case conditionCountsForGroup.NotReady == 1:
record.Condition = inframonitoringtypes.NodeConditionNotReady
}
}
}
if attrs, ok := metadataMap[compositeKey]; ok {
for k, v := range attrs {
record.Meta[k] = v
}
}
records = append(records, record)
}
return records
}
func (m *module) getTopNodeGroups(
ctx context.Context,
orgID valuer.UUID,
req *inframonitoringtypes.PostableNodes,
metadataMap map[string]map[string]string,
) ([]map[string]string, error) {
orderByKey := req.OrderBy.Key.Name
queryNamesForOrderBy := orderByToNodesQueryNames[orderByKey]
rankingQueryName := queryNamesForOrderBy[len(queryNamesForOrderBy)-1]
topReq := &qbtypes.QueryRangeRequest{
Start: uint64(req.Start),
End: uint64(req.End),
RequestType: qbtypes.RequestTypeScalar,
CompositeQuery: qbtypes.CompositeQuery{
Queries: make([]qbtypes.QueryEnvelope, 0, len(queryNamesForOrderBy)),
},
}
for _, envelope := range m.newNodesTableListQuery().CompositeQuery.Queries {
if !slices.Contains(queryNamesForOrderBy, envelope.GetQueryName()) {
continue
}
copied := envelope
if copied.Type == qbtypes.QueryTypeBuilder {
existingExpr := ""
if f := copied.GetFilter(); f != nil {
existingExpr = f.Expression
}
reqFilterExpr := ""
if req.Filter != nil {
reqFilterExpr = req.Filter.Expression
}
merged := mergeFilterExpressions(existingExpr, reqFilterExpr)
copied.SetFilter(&qbtypes.Filter{Expression: merged})
copied.SetGroupBy(req.GroupBy)
}
topReq.CompositeQuery.Queries = append(topReq.CompositeQuery.Queries, copied)
}
resp, err := m.querier.QueryRange(ctx, orgID, topReq)
if err != nil {
return nil, err
}
allMetricGroups := parseAndSortGroups(resp, rankingQueryName, req.GroupBy, req.OrderBy.Direction)
return paginateWithBackfill(allMetricGroups, metadataMap, req.GroupBy, req.Offset, req.Limit), nil
}
func (m *module) getNodesTableMetadata(ctx context.Context, req *inframonitoringtypes.PostableNodes) (map[string]map[string]string, error) {
var nonGroupByAttrs []string
for _, key := range nodeAttrKeysForMetadata {
if !isKeyInGroupByAttrs(req.GroupBy, key) {
nonGroupByAttrs = append(nonGroupByAttrs, key)
}
}
return m.getMetadata(ctx, nodesTableMetricNamesList, req.GroupBy, nonGroupByAttrs, req.Filter, req.Start, req.End)
}
// getPerGroupNodeConditionCounts computes per-group node counts bucketed by each
// node's latest condition_ready value (0 / 1) in the requested window.
// Pipeline:
//
// timeSeriesFPs: fp ↔ (node_name, groupBy cols) from the time_series table.
// User filter + page-groups filter applied here.
// latestConditionPerNode: INNER JOIN samples × timeSeriesFPs, collapsed to
// the latest condition value per node via argMax(value, unix_milli).
// countNodesPerCondition: per-group uniqExactIf into ready/not_ready buckets.
//
// Groups absent from the result map have implicit zero counts (caller default).
func (m *module) getPerGroupNodeConditionCounts(
ctx context.Context,
req *inframonitoringtypes.PostableNodes,
pageGroups []map[string]string,
) (map[string]nodeConditionCounts, error) {
if len(pageGroups) == 0 || len(req.GroupBy) == 0 {
return map[string]nodeConditionCounts{}, nil
}
// Merged filter expression (user filter + page-groups IN clauses).
reqFilterExpr := ""
if req.Filter != nil {
reqFilterExpr = req.Filter.Expression
}
pageGroupsFilterExpr := buildPageGroupsFilterExpr(pageGroups)
filterExpr := mergeFilterExpressions(reqFilterExpr, pageGroupsFilterExpr)
// Resolve tables. Same convention as pods.
adjustedStart, adjustedEnd, _, localTimeSeriesTable := telemetrymetrics.WhichTSTableToUse(
uint64(req.Start), uint64(req.End), nil,
)
samplesTable := telemetrymetrics.WhichSamplesTableToUse(
uint64(req.Start), uint64(req.End),
metrictypes.UnspecifiedType, metrictypes.TimeAggregationUnspecified, nil,
)
valueCol := telemetrymetrics.ValueColumnForSamplesTable(samplesTable)
// ----- timeSeriesFPs -----
timeSeriesFPs := sqlbuilder.NewSelectBuilder()
timeSeriesFPsSelectCols := []string{
"fingerprint",
fmt.Sprintf("JSONExtractString(labels, %s) AS node_name", timeSeriesFPs.Var(nodeNameAttrKey)),
}
for _, key := range req.GroupBy {
timeSeriesFPsSelectCols = append(timeSeriesFPsSelectCols,
fmt.Sprintf("JSONExtractString(labels, %s) AS %s", timeSeriesFPs.Var(key.Name), quoteIdentifier(key.Name)),
)
}
timeSeriesFPs.Select(timeSeriesFPsSelectCols...)
timeSeriesFPs.From(fmt.Sprintf("%s.%s", telemetrymetrics.DBName, localTimeSeriesTable))
timeSeriesFPs.Where(
timeSeriesFPs.E("metric_name", nodeConditionMetricName),
timeSeriesFPs.GE("unix_milli", adjustedStart),
timeSeriesFPs.L("unix_milli", adjustedEnd),
)
if filterExpr != "" {
filterClause, err := m.buildFilterClause(ctx, &qbtypes.Filter{Expression: filterExpr}, req.Start, req.End)
if err != nil {
return nil, err
}
if filterClause != nil {
timeSeriesFPs.AddWhereClause(filterClause)
}
}
timeSeriesFPsGroupBy := []string{"fingerprint", "node_name"}
for _, key := range req.GroupBy {
timeSeriesFPsGroupBy = append(timeSeriesFPsGroupBy, quoteIdentifier(key.Name))
}
timeSeriesFPs.GroupBy(timeSeriesFPsGroupBy...)
timeSeriesFPsSQL, timeSeriesFPsArgs := timeSeriesFPs.BuildWithFlavor(sqlbuilder.ClickHouse)
// ----- latestConditionPerNode -----
latestConditionPerNode := sqlbuilder.NewSelectBuilder()
latestConditionPerNodeSelectCols := []string{"tsfp.node_name AS node_name"}
latestConditionPerNodeGroupBy := []string{"node_name"}
for _, key := range req.GroupBy {
col := quoteIdentifier(key.Name)
latestConditionPerNodeSelectCols = append(latestConditionPerNodeSelectCols, fmt.Sprintf("tsfp.%s AS %s", col, col))
latestConditionPerNodeGroupBy = append(latestConditionPerNodeGroupBy, col)
}
latestConditionPerNodeSelectCols = append(latestConditionPerNodeSelectCols,
fmt.Sprintf("argMax(samples.%s, samples.unix_milli) AS condition_value", valueCol),
)
latestConditionPerNode.Select(latestConditionPerNodeSelectCols...)
latestConditionPerNode.From(fmt.Sprintf(
"%s.%s AS samples INNER JOIN time_series_fps AS tsfp ON samples.fingerprint = tsfp.fingerprint",
telemetrymetrics.DBName, samplesTable,
))
latestConditionPerNode.Where(
latestConditionPerNode.E("samples.metric_name", nodeConditionMetricName),
latestConditionPerNode.GE("samples.unix_milli", req.Start),
latestConditionPerNode.L("samples.unix_milli", req.End),
"tsfp.node_name != ''",
)
latestConditionPerNode.GroupBy(latestConditionPerNodeGroupBy...)
latestConditionPerNodeSQL, latestConditionPerNodeArgs := latestConditionPerNode.BuildWithFlavor(sqlbuilder.ClickHouse)
// ----- countNodesPerCondition (outer SELECT) -----
countNodesPerConditionSelectCols := make([]string, 0, len(req.GroupBy)+2)
countNodesPerConditionGroupBy := make([]string, 0, len(req.GroupBy))
for _, key := range req.GroupBy {
col := quoteIdentifier(key.Name)
countNodesPerConditionSelectCols = append(countNodesPerConditionSelectCols, col)
countNodesPerConditionGroupBy = append(countNodesPerConditionGroupBy, col)
}
countNodesPerConditionSelectCols = append(countNodesPerConditionSelectCols,
fmt.Sprintf("uniqExactIf(node_name, condition_value = %d) AS ready_count", inframonitoringtypes.NodeConditionNumReady),
fmt.Sprintf("uniqExactIf(node_name, condition_value = %d) AS not_ready_count", inframonitoringtypes.NodeConditionNumNotReady),
)
countNodesPerConditionSQL := fmt.Sprintf(
"SELECT %s FROM latest_condition_per_node GROUP BY %s",
strings.Join(countNodesPerConditionSelectCols, ", "),
strings.Join(countNodesPerConditionGroupBy, ", "),
)
// Combine CTEs + outer.
cteFragments := []string{
fmt.Sprintf("time_series_fps AS (%s)", timeSeriesFPsSQL),
fmt.Sprintf("latest_condition_per_node AS (%s)", latestConditionPerNodeSQL),
}
finalSQL := querybuilder.CombineCTEs(cteFragments) + countNodesPerConditionSQL
finalArgs := querybuilder.PrependArgs([][]any{timeSeriesFPsArgs, latestConditionPerNodeArgs}, nil)
rows, err := m.telemetryStore.ClickhouseDB().Query(ctx, finalSQL, finalArgs...)
if err != nil {
return nil, err
}
defer rows.Close()
result := make(map[string]nodeConditionCounts)
for rows.Next() {
groupVals := make([]string, len(req.GroupBy))
scanPtrs := make([]any, 0, len(req.GroupBy)+2)
for i := range groupVals {
scanPtrs = append(scanPtrs, &groupVals[i])
}
var ready, notReady uint64
scanPtrs = append(scanPtrs, &ready, &notReady)
if err := rows.Scan(scanPtrs...); err != nil {
return nil, err
}
result[compositeKeyFromList(groupVals)] = nodeConditionCounts{
Ready: int(ready),
NotReady: int(notReady),
}
}
if err := rows.Err(); err != nil {
return nil, err
}
return result, nil
}

View File

@@ -0,0 +1,134 @@
package implinframonitoring
import (
"github.com/SigNoz/signoz/pkg/types/inframonitoringtypes"
"github.com/SigNoz/signoz/pkg/types/metrictypes"
qbtypes "github.com/SigNoz/signoz/pkg/types/querybuildertypes/querybuildertypesv5"
"github.com/SigNoz/signoz/pkg/types/telemetrytypes"
)
const (
nodeNameAttrKey = "k8s.node.name"
nodeConditionMetricName = "k8s.node.condition_ready"
)
var nodeNameGroupByKey = qbtypes.GroupByKey{
TelemetryFieldKey: telemetrytypes.TelemetryFieldKey{
Name: nodeNameAttrKey,
FieldContext: telemetrytypes.FieldContextResource,
FieldDataType: telemetrytypes.FieldDataTypeString,
},
}
// nodesTableMetricNamesList drives the existence/retention check.
// Includes condition_ready so the response short-circuits cleanly when a
// cluster doesn't ship the metric — even though condition_ready isn't part
// of the QB composite query (it's queried separately via getPerGroupNodeConditionCounts).
var nodesTableMetricNamesList = []string{
"k8s.node.cpu.usage",
"k8s.node.allocatable_cpu",
"k8s.node.memory.working_set",
"k8s.node.allocatable_memory",
"k8s.node.condition_ready",
}
var nodeAttrKeysForMetadata = []string{
"k8s.node.uid",
"k8s.cluster.name",
}
var orderByToNodesQueryNames = map[string][]string{
inframonitoringtypes.NodesOrderByCPU: {"A"},
inframonitoringtypes.NodesOrderByCPUAllocatable: {"B"},
inframonitoringtypes.NodesOrderByMemory: {"C"},
inframonitoringtypes.NodesOrderByMemoryAllocatable: {"D"},
}
// newNodesTableListQuery builds the composite QB v5 request for the nodes list.
// Node condition is derived separately via getPerGroupNodeConditionCounts (works
// for both list and grouped_list modes), so no condition query is included here.
func (m *module) newNodesTableListQuery() *qbtypes.QueryRangeRequest {
queries := []qbtypes.QueryEnvelope{
// Query A: CPU usage
{
Type: qbtypes.QueryTypeBuilder,
Spec: qbtypes.QueryBuilderQuery[qbtypes.MetricAggregation]{
Name: "A",
Signal: telemetrytypes.SignalMetrics,
Aggregations: []qbtypes.MetricAggregation{
{
MetricName: "k8s.node.cpu.usage",
TimeAggregation: metrictypes.TimeAggregationAvg,
SpaceAggregation: metrictypes.SpaceAggregationSum,
ReduceTo: qbtypes.ReduceToAvg,
},
},
GroupBy: []qbtypes.GroupByKey{nodeNameGroupByKey},
Disabled: false,
},
},
// Query B: CPU allocatable.
// TimeAggregationLatest is the closest v5 equivalent of v1's AnyLast;
// allocatable values change rarely so divergence in practice is negligible.
{
Type: qbtypes.QueryTypeBuilder,
Spec: qbtypes.QueryBuilderQuery[qbtypes.MetricAggregation]{
Name: "B",
Signal: telemetrytypes.SignalMetrics,
Aggregations: []qbtypes.MetricAggregation{
{
MetricName: "k8s.node.allocatable_cpu",
TimeAggregation: metrictypes.TimeAggregationLatest,
SpaceAggregation: metrictypes.SpaceAggregationSum,
ReduceTo: qbtypes.ReduceToAvg,
},
},
GroupBy: []qbtypes.GroupByKey{nodeNameGroupByKey},
Disabled: false,
},
},
// Query C: Memory working set
{
Type: qbtypes.QueryTypeBuilder,
Spec: qbtypes.QueryBuilderQuery[qbtypes.MetricAggregation]{
Name: "C",
Signal: telemetrytypes.SignalMetrics,
Aggregations: []qbtypes.MetricAggregation{
{
MetricName: "k8s.node.memory.working_set",
TimeAggregation: metrictypes.TimeAggregationAvg,
SpaceAggregation: metrictypes.SpaceAggregationSum,
ReduceTo: qbtypes.ReduceToAvg,
},
},
GroupBy: []qbtypes.GroupByKey{nodeNameGroupByKey},
Disabled: false,
},
},
// Query D: Memory allocatable. Same Latest caveat as Query B.
{
Type: qbtypes.QueryTypeBuilder,
Spec: qbtypes.QueryBuilderQuery[qbtypes.MetricAggregation]{
Name: "D",
Signal: telemetrytypes.SignalMetrics,
Aggregations: []qbtypes.MetricAggregation{
{
MetricName: "k8s.node.allocatable_memory",
TimeAggregation: metrictypes.TimeAggregationLatest,
SpaceAggregation: metrictypes.SpaceAggregationSum,
ReduceTo: qbtypes.ReduceToAvg,
},
},
GroupBy: []qbtypes.GroupByKey{nodeNameGroupByKey},
Disabled: false,
},
},
}
return &qbtypes.QueryRangeRequest{
RequestType: qbtypes.RequestTypeScalar,
CompositeQuery: qbtypes.CompositeQuery{
Queries: queries,
},
}
}

View File

@@ -11,9 +11,13 @@ import (
type Handler interface {
ListHosts(http.ResponseWriter, *http.Request)
ListPods(http.ResponseWriter, *http.Request)
ListNodes(http.ResponseWriter, *http.Request)
ListNamespaces(http.ResponseWriter, *http.Request)
}
type Module interface {
ListHosts(ctx context.Context, orgID valuer.UUID, req *inframonitoringtypes.PostableHosts) (*inframonitoringtypes.Hosts, error)
ListPods(ctx context.Context, orgID valuer.UUID, req *inframonitoringtypes.PostablePods) (*inframonitoringtypes.Pods, error)
ListNodes(ctx context.Context, orgID valuer.UUID, req *inframonitoringtypes.PostableNodes) (*inframonitoringtypes.Nodes, error)
ListNamespaces(ctx context.Context, orgID valuer.UUID, req *inframonitoringtypes.PostableNamespaces) (*inframonitoringtypes.Namespaces, error)
}

View File

@@ -32,28 +32,30 @@ import (
)
type PrepareTaskOptions struct {
Rule *ruletypes.PostableRule
TaskName string
RuleStore ruletypes.RuleStore
Querier querier.Querier
Logger *slog.Logger
Cache cache.Cache
ManagerOpts *ManagerOptions
NotifyFunc NotifyFunc
SQLStore sqlstore.SQLStore
OrgID valuer.UUID
Rule *ruletypes.PostableRule
TaskName string
RuleStore ruletypes.RuleStore
MaintenanceStore ruletypes.MaintenanceStore
Querier querier.Querier
Logger *slog.Logger
Cache cache.Cache
ManagerOpts *ManagerOptions
NotifyFunc NotifyFunc
SQLStore sqlstore.SQLStore
OrgID valuer.UUID
}
type PrepareTestRuleOptions struct {
Rule *ruletypes.PostableRule
RuleStore ruletypes.RuleStore
Querier querier.Querier
Logger *slog.Logger
Cache cache.Cache
ManagerOpts *ManagerOptions
NotifyFunc NotifyFunc
SQLStore sqlstore.SQLStore
OrgID valuer.UUID
Rule *ruletypes.PostableRule
RuleStore ruletypes.RuleStore
MaintenanceStore ruletypes.MaintenanceStore
Querier querier.Querier
Logger *slog.Logger
Cache cache.Cache
ManagerOpts *ManagerOptions
NotifyFunc NotifyFunc
SQLStore sqlstore.SQLStore
OrgID valuer.UUID
}
const taskNameSuffix = "webAppEditor"
@@ -132,6 +134,7 @@ func defaultOptions(o *ManagerOptions) *ManagerOptions {
}
func defaultPrepareTaskFunc(opts PrepareTaskOptions) (Task, error) {
rules := make([]Rule, 0)
var task Task
@@ -156,6 +159,7 @@ func defaultPrepareTaskFunc(opts PrepareTaskOptions) (Task, error) {
WithMetadataStore(opts.ManagerOpts.MetadataStore),
WithRuleStateHistoryModule(opts.ManagerOpts.RuleStateHistoryModule),
)
if err != nil {
return task, err
}
@@ -163,7 +167,7 @@ func defaultPrepareTaskFunc(opts PrepareTaskOptions) (Task, error) {
rules = append(rules, tr)
// create ch rule task for evaluation
task = newTask(TaskTypeCh, opts.TaskName, taskNameSuffix, evaluation.GetFrequency().Duration(), rules, opts.ManagerOpts, opts.NotifyFunc)
task = newTask(TaskTypeCh, opts.TaskName, taskNameSuffix, evaluation.GetFrequency().Duration(), rules, opts.ManagerOpts, opts.NotifyFunc, opts.MaintenanceStore, opts.OrgID)
} else if opts.Rule.RuleType == ruletypes.RuleTypeProm {
@@ -179,6 +183,7 @@ func defaultPrepareTaskFunc(opts PrepareTaskOptions) (Task, error) {
WithMetadataStore(opts.ManagerOpts.MetadataStore),
WithRuleStateHistoryModule(opts.ManagerOpts.RuleStateHistoryModule),
)
if err != nil {
return task, err
}
@@ -186,7 +191,7 @@ func defaultPrepareTaskFunc(opts PrepareTaskOptions) (Task, error) {
rules = append(rules, pr)
// create promql rule task for evaluation
task = newTask(TaskTypeProm, opts.TaskName, taskNameSuffix, evaluation.GetFrequency().Duration(), rules, opts.ManagerOpts, opts.NotifyFunc)
task = newTask(TaskTypeProm, opts.TaskName, taskNameSuffix, evaluation.GetFrequency().Duration(), rules, opts.ManagerOpts, opts.NotifyFunc, opts.MaintenanceStore, opts.OrgID)
} else {
return nil, errors.NewInvalidInputf(errors.CodeInvalidInput, "unsupported rule type %s. Supported types: %s, %s", opts.Rule.RuleType, ruletypes.RuleTypeProm, ruletypes.RuleTypeThreshold)
@@ -236,7 +241,6 @@ func (m *Manager) MaintenanceStore() ruletypes.MaintenanceStore {
return m.maintenanceStore
}
// TODO: remove (unused)?
func (m *Manager) Pause(b bool) {
m.mtx.Lock()
defer m.mtx.Unlock()
@@ -426,17 +430,19 @@ func (m *Manager) editTask(_ context.Context, orgID valuer.UUID, rule *ruletypes
m.logger.Debug("editing a rule task", "name", taskName)
newTask, err := m.prepareTaskFunc(PrepareTaskOptions{
Rule: rule,
TaskName: taskName,
RuleStore: m.ruleStore,
Querier: m.opts.Querier,
Logger: m.opts.Logger,
Cache: m.cache,
ManagerOpts: m.opts,
NotifyFunc: m.notifyFunc,
SQLStore: m.sqlstore,
OrgID: orgID,
Rule: rule,
TaskName: taskName,
RuleStore: m.ruleStore,
MaintenanceStore: m.maintenanceStore,
Querier: m.opts.Querier,
Logger: m.opts.Logger,
Cache: m.cache,
ManagerOpts: m.opts,
NotifyFunc: m.notifyFunc,
SQLStore: m.sqlstore,
OrgID: orgID,
})
if err != nil {
m.logger.Error("loading tasks failed", errors.Attr(err))
return errors.NewInvalidInputf(errors.CodeInvalidInput, "error preparing rule with given parameters, previous rule set restored")
@@ -637,17 +643,19 @@ func (m *Manager) addTask(_ context.Context, orgID valuer.UUID, rule *ruletypes.
m.logger.Debug("adding a new rule task", "name", taskName)
newTask, err := m.prepareTaskFunc(PrepareTaskOptions{
Rule: rule,
TaskName: taskName,
RuleStore: m.ruleStore,
Querier: m.opts.Querier,
Logger: m.opts.Logger,
Cache: m.cache,
ManagerOpts: m.opts,
NotifyFunc: m.notifyFunc,
SQLStore: m.sqlstore,
OrgID: orgID,
Rule: rule,
TaskName: taskName,
RuleStore: m.ruleStore,
MaintenanceStore: m.maintenanceStore,
Querier: m.opts.Querier,
Logger: m.opts.Logger,
Cache: m.cache,
ManagerOpts: m.opts,
NotifyFunc: m.notifyFunc,
SQLStore: m.sqlstore,
OrgID: orgID,
})
if err != nil {
m.logger.Error("creating rule task failed", "name", taskName, errors.Attr(err))
return errors.NewInvalidInputf(errors.CodeInvalidInput, "error loading rules, previous rule set restored")
@@ -695,6 +703,7 @@ func (m *Manager) RuleTasks() []Task {
// RuleTasksWithoutLock returns the list of manager's rule tasks without
// acquiring a lock on the manager.
func (m *Manager) RuleTasksWithoutLock() []Task {
rgs := make([]Task, 0, len(m.tasks))
for _, g := range m.tasks {
rgs = append(rgs, g)
@@ -888,6 +897,7 @@ func (m *Manager) GetRule(ctx context.Context, id valuer.UUID) (*ruletypes.Getta
// the task state. For example - if a stored rule is disabled, then
// there is no task running against it.
func (m *Manager) syncRuleStateWithTask(ctx context.Context, orgID valuer.UUID, taskName string, rule *ruletypes.PostableRule) error {
if rule.Disabled {
// check if rule has any task running
if _, ok := m.tasks[taskName]; ok {
@@ -1019,15 +1029,16 @@ func (m *Manager) TestNotification(ctx context.Context, orgID valuer.UUID, ruleS
}
alertCount, err := m.prepareTestRuleFunc(PrepareTestRuleOptions{
Rule: &parsedRule,
RuleStore: m.ruleStore,
Querier: m.opts.Querier,
Logger: m.opts.Logger,
Cache: m.cache,
ManagerOpts: m.opts,
NotifyFunc: m.testNotifyFunc,
SQLStore: m.sqlstore,
OrgID: orgID,
Rule: &parsedRule,
RuleStore: m.ruleStore,
MaintenanceStore: m.maintenanceStore,
Querier: m.opts.Querier,
Logger: m.opts.Logger,
Cache: m.cache,
ManagerOpts: m.opts,
NotifyFunc: m.testNotifyFunc,
SQLStore: m.sqlstore,
OrgID: orgID,
})
return alertCount, err

View File

@@ -15,6 +15,7 @@ import (
"github.com/SigNoz/signoz/pkg/types/authtypes"
"github.com/SigNoz/signoz/pkg/types/ctxtypes"
ruletypes "github.com/SigNoz/signoz/pkg/types/ruletypes"
"github.com/SigNoz/signoz/pkg/valuer"
)
// PromRuleTask is a promql rule executor
@@ -38,11 +39,14 @@ type PromRuleTask struct {
pause bool
logger *slog.Logger
notify NotifyFunc
maintenanceStore ruletypes.MaintenanceStore
orgID valuer.UUID
}
// NewPromRuleTask holds rules that have promql condition
// and evaluates the rule at a given frequency
func NewPromRuleTask(name, file string, frequency time.Duration, rules []Rule, opts *ManagerOptions, notify NotifyFunc) *PromRuleTask {
func NewPromRuleTask(name, file string, frequency time.Duration, rules []Rule, opts *ManagerOptions, notify NotifyFunc, maintenanceStore ruletypes.MaintenanceStore, orgID valuer.UUID) *PromRuleTask {
opts.Logger.Info("initiating a new rule group", "name", name, "frequency", frequency)
if frequency == 0 {
@@ -59,8 +63,10 @@ func NewPromRuleTask(name, file string, frequency time.Duration, rules []Rule, o
seriesInPreviousEval: make([]map[string]plabels.Labels, len(rules)),
done: make(chan struct{}),
terminated: make(chan struct{}),
notify: notify,
logger: opts.Logger,
notify: notify,
maintenanceStore: maintenanceStore,
logger: opts.Logger,
orgID: orgID,
}
}
@@ -324,12 +330,30 @@ func (g *PromRuleTask) Eval(ctx context.Context, ts time.Time) {
}()
g.logger.InfoContext(ctx, "promql rule task", "name", g.name, "eval_started_at", ts)
maintenance, err := g.maintenanceStore.ListPlannedMaintenance(ctx, g.orgID.StringValue())
if err != nil {
g.logger.ErrorContext(ctx, "error in processing sql query", errors.Attr(err))
}
for i, rule := range g.rules {
if rule == nil {
continue
}
shouldSkip := false
for _, m := range maintenance {
g.logger.InfoContext(ctx, "checking if rule should be skipped", slog.String("rule.id", rule.ID()), slog.Any("maintenance", m))
if m.ShouldSkip(rule.ID(), ts) {
shouldSkip = true
break
}
}
if shouldSkip {
g.logger.InfoContext(ctx, "rule should be skipped", slog.String("rule.id", rule.ID()))
continue
}
select {
case <-g.done:
return

View File

@@ -2,18 +2,20 @@ package rules
import (
"context"
"log/slog"
"runtime/debug"
"sort"
"sync"
"time"
"log/slog"
opentracing "github.com/opentracing/opentracing-go"
"github.com/SigNoz/signoz/pkg/errors"
"github.com/SigNoz/signoz/pkg/types/authtypes"
"github.com/SigNoz/signoz/pkg/types/ctxtypes"
"github.com/SigNoz/signoz/pkg/types/ruletypes"
"github.com/SigNoz/signoz/pkg/valuer"
)
// RuleTask holds a rule (with composite queries)
@@ -35,28 +37,34 @@ type RuleTask struct {
pause bool
notify NotifyFunc
maintenanceStore ruletypes.MaintenanceStore
orgID valuer.UUID
}
const DefaultFrequency = 1 * time.Minute
// NewRuleTask makes a new RuleTask with the given name, options, and rules.
func NewRuleTask(name, file string, frequency time.Duration, rules []Rule, opts *ManagerOptions, notify NotifyFunc) *RuleTask {
func NewRuleTask(name, file string, frequency time.Duration, rules []Rule, opts *ManagerOptions, notify NotifyFunc, maintenanceStore ruletypes.MaintenanceStore, orgID valuer.UUID) *RuleTask {
if frequency == 0 {
frequency = DefaultFrequency
}
opts.Logger.Info("initiating a new rule task", "name", name, "frequency", frequency)
return &RuleTask{
name: name,
file: file,
pause: false,
frequency: frequency,
rules: rules,
opts: opts,
logger: opts.Logger,
done: make(chan struct{}),
terminated: make(chan struct{}),
notify: notify,
name: name,
file: file,
pause: false,
frequency: frequency,
rules: rules,
opts: opts,
logger: opts.Logger,
done: make(chan struct{}),
terminated: make(chan struct{}),
notify: notify,
maintenanceStore: maintenanceStore,
orgID: orgID,
}
}
@@ -64,7 +72,6 @@ func NewRuleTask(name, file string, frequency time.Duration, rules []Rule, opts
func (g *RuleTask) Name() string { return g.name }
// Key returns the group key
// TODO: remove (unused)?
func (g *RuleTask) Key() string {
return g.name + ";" + g.file
}
@@ -254,6 +261,7 @@ func nameAndLabels(rule Rule) string {
// Rules are matched based on their name and labels. If there are duplicates, the
// first is matched with the first, second with the second etc.
func (g *RuleTask) CopyState(fromTask Task) error {
from, ok := fromTask.(*RuleTask)
if !ok {
return errors.NewInternalf(errors.CodeInternal, "invalid from task for copy")
@@ -298,6 +306,7 @@ func (g *RuleTask) CopyState(fromTask Task) error {
// Eval runs a single evaluation cycle in which all rules are evaluated sequentially.
func (g *RuleTask) Eval(ctx context.Context, ts time.Time) {
defer func() {
if r := recover(); r != nil {
g.logger.ErrorContext(
@@ -309,11 +318,31 @@ func (g *RuleTask) Eval(ctx context.Context, ts time.Time) {
g.logger.DebugContext(ctx, "rule task eval started", "name", g.name, "start_time", ts)
maintenance, err := g.maintenanceStore.ListPlannedMaintenance(ctx, g.orgID.StringValue())
if err != nil {
g.logger.ErrorContext(ctx, "error in processing sql query", errors.Attr(err))
}
for i, rule := range g.rules {
if rule == nil {
continue
}
shouldSkip := false
for _, m := range maintenance {
g.logger.InfoContext(ctx, "checking if rule should be skipped", slog.String("rule.id", rule.ID()), slog.Any("maintenance", m))
if m.ShouldSkip(rule.ID(), ts) {
shouldSkip = true
break
}
}
if shouldSkip {
g.logger.InfoContext(ctx, "rule should be skipped", slog.String("rule.id", rule.ID()))
continue
}
select {
case <-g.done:
return
@@ -353,6 +382,7 @@ func (g *RuleTask) Eval(ctx context.Context, ts time.Time) {
}
rule.SendAlerts(ctx, ts, g.opts.ResendDelay, g.frequency, g.notify)
}(i, rule)
}
}

View File

@@ -3,6 +3,9 @@ package rules
import (
"context"
"time"
"github.com/SigNoz/signoz/pkg/types/ruletypes"
"github.com/SigNoz/signoz/pkg/valuer"
)
type TaskType string
@@ -29,9 +32,9 @@ type Task interface {
// newTask returns an appropriate group for
// rule type
func newTask(taskType TaskType, name, file string, frequency time.Duration, rules []Rule, opts *ManagerOptions, notify NotifyFunc) Task {
func newTask(taskType TaskType, name, file string, frequency time.Duration, rules []Rule, opts *ManagerOptions, notify NotifyFunc, maintenanceStore ruletypes.MaintenanceStore, orgID valuer.UUID) Task {
if taskType == TaskTypeCh {
return NewRuleTask(name, file, frequency, rules, opts, notify)
return NewRuleTask(name, file, frequency, rules, opts, notify, maintenanceStore, orgID)
}
return NewPromRuleTask(name, file, frequency, rules, opts, notify)
return NewPromRuleTask(name, file, frequency, rules, opts, notify, maintenanceStore, orgID)
}

View File

@@ -19,7 +19,6 @@ import (
"github.com/SigNoz/signoz/pkg/modules/user/impluser"
"github.com/SigNoz/signoz/pkg/querier"
"github.com/SigNoz/signoz/pkg/queryparser"
"github.com/SigNoz/signoz/pkg/ruler/rulestore/sqlrulestore"
"github.com/SigNoz/signoz/pkg/sharder"
"github.com/SigNoz/signoz/pkg/sharder/noopsharder"
"github.com/SigNoz/signoz/pkg/sqlstore"
@@ -39,8 +38,7 @@ func TestNewHandlers(t *testing.T) {
orgGetter := implorganization.NewGetter(implorganization.NewStore(sqlstore), sharder)
notificationManager := nfmanagertest.NewMock()
require.NoError(t, err)
maintenanceStore := sqlrulestore.NewMaintenanceStore(sqlstore)
alertmanager, err := signozalertmanager.New(providerSettings, alertmanager.Config{}, sqlstore, orgGetter, notificationManager, maintenanceStore)
alertmanager, err := signozalertmanager.New(context.TODO(), providerSettings, alertmanager.Config{}, sqlstore, orgGetter, notificationManager)
require.NoError(t, err)
tokenizer := tokenizertest.NewMockTokenizer(t)
emailing := emailingtest.New()

View File

@@ -20,7 +20,6 @@ import (
"github.com/SigNoz/signoz/pkg/modules/serviceaccount/implserviceaccount"
"github.com/SigNoz/signoz/pkg/modules/user/impluser"
"github.com/SigNoz/signoz/pkg/queryparser"
"github.com/SigNoz/signoz/pkg/ruler/rulestore/sqlrulestore"
"github.com/SigNoz/signoz/pkg/sharder"
"github.com/SigNoz/signoz/pkg/sharder/noopsharder"
"github.com/SigNoz/signoz/pkg/sqlstore"
@@ -40,8 +39,7 @@ func TestNewModules(t *testing.T) {
orgGetter := implorganization.NewGetter(implorganization.NewStore(sqlstore), sharder)
notificationManager := nfmanagertest.NewMock()
require.NoError(t, err)
maintenanceStore := sqlrulestore.NewMaintenanceStore(sqlstore)
alertmanager, err := signozalertmanager.New(providerSettings, alertmanager.Config{}, sqlstore, orgGetter, notificationManager, maintenanceStore)
alertmanager, err := signozalertmanager.New(context.TODO(), providerSettings, alertmanager.Config{}, sqlstore, orgGetter, notificationManager)
require.NoError(t, err)
tokenizer := tokenizertest.NewMockTokenizer(t)
emailing := emailingtest.New()

View File

@@ -65,7 +65,6 @@ import (
"github.com/SigNoz/signoz/pkg/tokenizer/tokenizerstore/sqltokenizerstore"
"github.com/SigNoz/signoz/pkg/types/alertmanagertypes"
"github.com/SigNoz/signoz/pkg/types/featuretypes"
ruletypes "github.com/SigNoz/signoz/pkg/types/ruletypes"
"github.com/SigNoz/signoz/pkg/version"
"github.com/SigNoz/signoz/pkg/web"
"github.com/SigNoz/signoz/pkg/web/noopweb"
@@ -222,14 +221,9 @@ func NewNotificationManagerProviderFactories(routeStore alertmanagertypes.RouteS
)
}
func NewAlertmanagerProviderFactories(
sqlstore sqlstore.SQLStore,
orgGetter organization.Getter,
nfManager nfmanager.NotificationManager,
maintenanceStore ruletypes.MaintenanceStore,
) factory.NamedMap[factory.ProviderFactory[alertmanager.Alertmanager, alertmanager.Config]] {
func NewAlertmanagerProviderFactories(sqlstore sqlstore.SQLStore, orgGetter organization.Getter, nfManager nfmanager.NotificationManager) factory.NamedMap[factory.ProviderFactory[alertmanager.Alertmanager, alertmanager.Config]] {
return factory.MustNewNamedMap(
signozalertmanager.NewFactory(sqlstore, orgGetter, nfManager, maintenanceStore),
signozalertmanager.NewFactory(sqlstore, orgGetter, nfManager),
)
}

View File

@@ -14,7 +14,6 @@ import (
"github.com/SigNoz/signoz/pkg/modules/user/impluser"
"github.com/SigNoz/signoz/pkg/sqlschema"
"github.com/SigNoz/signoz/pkg/sqlschema/sqlschematest"
"github.com/SigNoz/signoz/pkg/ruler/rulestore/sqlrulestore"
"github.com/SigNoz/signoz/pkg/sqlstore"
"github.com/SigNoz/signoz/pkg/sqlstore/sqlstoretest"
"github.com/SigNoz/signoz/pkg/statsreporter"
@@ -60,11 +59,9 @@ func TestNewProviderFactories(t *testing.T) {
})
assert.NotPanics(t, func() {
store := sqlstoretest.New(sqlstore.Config{Provider: "sqlite"}, sqlmock.QueryMatcherEqual)
orgGetter := implorganization.NewGetter(implorganization.NewStore(store), nil)
orgGetter := implorganization.NewGetter(implorganization.NewStore(sqlstoretest.New(sqlstore.Config{Provider: "sqlite"}, sqlmock.QueryMatcherEqual)), nil)
notificationManager := nfmanagertest.NewMock()
maintenanceStore := sqlrulestore.NewMaintenanceStore(store)
NewAlertmanagerProviderFactories(store, orgGetter, notificationManager, maintenanceStore)
NewAlertmanagerProviderFactories(sqlstoretest.New(sqlstore.Config{Provider: "sqlite"}, sqlmock.QueryMatcherEqual), orgGetter, notificationManager)
})
assert.NotPanics(t, func() {

View File

@@ -34,7 +34,6 @@ import (
"github.com/SigNoz/signoz/pkg/querier"
"github.com/SigNoz/signoz/pkg/queryparser"
"github.com/SigNoz/signoz/pkg/ruler"
sqlrulestore "github.com/SigNoz/signoz/pkg/ruler/rulestore/sqlrulestore"
"github.com/SigNoz/signoz/pkg/sharder"
"github.com/SigNoz/signoz/pkg/sqlmigration"
"github.com/SigNoz/signoz/pkg/sqlmigrator"
@@ -363,14 +362,12 @@ func New(
return nil, err
}
maintenanceStore := sqlrulestore.NewMaintenanceStore(sqlstore)
// Initialize alertmanager from the available alertmanager provider factories
alertmanager, err := factory.NewProviderFromNamedMap(
ctx,
providerSettings,
config.Alertmanager,
NewAlertmanagerProviderFactories(sqlstore, orgGetter, nfManager, maintenanceStore),
NewAlertmanagerProviderFactories(sqlstore, orgGetter, nfManager),
config.Alertmanager.Provider,
)
if err != nil {

View File

@@ -0,0 +1,103 @@
package inframonitoringtypes
import (
"encoding/json"
"slices"
"github.com/SigNoz/signoz/pkg/errors"
qbtypes "github.com/SigNoz/signoz/pkg/types/querybuildertypes/querybuildertypesv5"
)
type Namespaces struct {
Type ResponseType `json:"type" required:"true"`
Records []NamespaceRecord `json:"records" required:"true"`
Total int `json:"total" required:"true"`
RequiredMetricsCheck RequiredMetricsCheck `json:"requiredMetricsCheck" required:"true"`
EndTimeBeforeRetention bool `json:"endTimeBeforeRetention" required:"true"`
Warning *qbtypes.QueryWarnData `json:"warning,omitempty"`
}
type NamespaceRecord struct {
NamespaceName string `json:"namespaceName" required:"true"`
NamespaceCPU float64 `json:"namespaceCPU" required:"true"`
NamespaceMemory float64 `json:"namespaceMemory" required:"true"`
PendingPodCount int `json:"pendingPodCount" required:"true"`
RunningPodCount int `json:"runningPodCount" required:"true"`
SucceededPodCount int `json:"succeededPodCount" required:"true"`
FailedPodCount int `json:"failedPodCount" required:"true"`
UnknownPodCount int `json:"unknownPodCount" required:"true"`
Meta map[string]interface{} `json:"meta" required:"true"`
}
// PostableNamespaces is the request body for the v2 namespaces list API.
type PostableNamespaces struct {
Start int64 `json:"start" required:"true"`
End int64 `json:"end" required:"true"`
Filter *qbtypes.Filter `json:"filter"`
GroupBy []qbtypes.GroupByKey `json:"groupBy"`
OrderBy *qbtypes.OrderBy `json:"orderBy"`
Offset int `json:"offset"`
Limit int `json:"limit" required:"true"`
}
// Validate ensures PostableNamespaces contains acceptable values.
func (req *PostableNamespaces) Validate() error {
if req == nil {
return errors.NewInvalidInputf(errors.CodeInvalidInput, "request is nil")
}
if req.Start <= 0 {
return errors.NewInvalidInputf(
errors.CodeInvalidInput,
"invalid start time %d: start must be greater than 0",
req.Start,
)
}
if req.End <= 0 {
return errors.NewInvalidInputf(
errors.CodeInvalidInput,
"invalid end time %d: end must be greater than 0",
req.End,
)
}
if req.Start >= req.End {
return errors.NewInvalidInputf(
errors.CodeInvalidInput,
"invalid time range: start (%d) must be less than end (%d)",
req.Start,
req.End,
)
}
if req.Limit < 1 || req.Limit > 5000 {
return errors.NewInvalidInputf(errors.CodeInvalidInput, "limit must be between 1 and 5000")
}
if req.Offset < 0 {
return errors.NewInvalidInputf(errors.CodeInvalidInput, "offset cannot be negative")
}
if req.OrderBy != nil {
if !slices.Contains(NamespacesValidOrderByKeys, req.OrderBy.Key.Name) {
return errors.NewInvalidInputf(errors.CodeInvalidInput, "invalid order by key: %s", req.OrderBy.Key.Name)
}
if req.OrderBy.Direction != qbtypes.OrderDirectionAsc && req.OrderBy.Direction != qbtypes.OrderDirectionDesc {
return errors.NewInvalidInputf(errors.CodeInvalidInput, "invalid order by direction: %s", req.OrderBy.Direction)
}
}
return nil
}
// UnmarshalJSON validates input immediately after decoding.
func (req *PostableNamespaces) UnmarshalJSON(data []byte) error {
type raw PostableNamespaces
var decoded raw
if err := json.Unmarshal(data, &decoded); err != nil {
return err
}
*req = PostableNamespaces(decoded)
return req.Validate()
}

View File

@@ -0,0 +1,11 @@
package inframonitoringtypes
const (
NamespacesOrderByCPU = "cpu"
NamespacesOrderByMemory = "memory"
)
var NamespacesValidOrderByKeys = []string{
NamespacesOrderByCPU,
NamespacesOrderByMemory,
}

View File

@@ -0,0 +1,237 @@
package inframonitoringtypes
import (
"testing"
"github.com/SigNoz/signoz/pkg/errors"
qbtypes "github.com/SigNoz/signoz/pkg/types/querybuildertypes/querybuildertypesv5"
"github.com/SigNoz/signoz/pkg/types/telemetrytypes"
"github.com/SigNoz/signoz/pkg/valuer"
"github.com/stretchr/testify/require"
)
func TestPostableNamespaces_Validate(t *testing.T) {
tests := []struct {
name string
req *PostableNamespaces
wantErr bool
}{
{
name: "valid request",
req: &PostableNamespaces{
Start: 1000,
End: 2000,
Limit: 100,
Offset: 0,
},
wantErr: false,
},
{
name: "nil request",
req: nil,
wantErr: true,
},
{
name: "start time zero",
req: &PostableNamespaces{
Start: 0,
End: 2000,
Limit: 100,
Offset: 0,
},
wantErr: true,
},
{
name: "start time negative",
req: &PostableNamespaces{
Start: -1000,
End: 2000,
Limit: 100,
Offset: 0,
},
wantErr: true,
},
{
name: "end time zero",
req: &PostableNamespaces{
Start: 1000,
End: 0,
Limit: 100,
Offset: 0,
},
wantErr: true,
},
{
name: "start time greater than end time",
req: &PostableNamespaces{
Start: 2000,
End: 1000,
Limit: 100,
Offset: 0,
},
wantErr: true,
},
{
name: "start time equal to end time",
req: &PostableNamespaces{
Start: 1000,
End: 1000,
Limit: 100,
Offset: 0,
},
wantErr: true,
},
{
name: "limit zero",
req: &PostableNamespaces{
Start: 1000,
End: 2000,
Limit: 0,
Offset: 0,
},
wantErr: true,
},
{
name: "limit negative",
req: &PostableNamespaces{
Start: 1000,
End: 2000,
Limit: -10,
Offset: 0,
},
wantErr: true,
},
{
name: "limit exceeds max",
req: &PostableNamespaces{
Start: 1000,
End: 2000,
Limit: 5001,
Offset: 0,
},
wantErr: true,
},
{
name: "offset negative",
req: &PostableNamespaces{
Start: 1000,
End: 2000,
Limit: 100,
Offset: -5,
},
wantErr: true,
},
{
name: "orderBy nil is valid",
req: &PostableNamespaces{
Start: 1000,
End: 2000,
Limit: 100,
Offset: 0,
},
wantErr: false,
},
{
name: "orderBy with valid key cpu and direction asc",
req: &PostableNamespaces{
Start: 1000,
End: 2000,
Limit: 100,
Offset: 0,
OrderBy: &qbtypes.OrderBy{
Key: qbtypes.OrderByKey{
TelemetryFieldKey: telemetrytypes.TelemetryFieldKey{
Name: NamespacesOrderByCPU,
},
},
Direction: qbtypes.OrderDirectionAsc,
},
},
wantErr: false,
},
{
name: "orderBy with valid key memory and direction desc",
req: &PostableNamespaces{
Start: 1000,
End: 2000,
Limit: 100,
Offset: 0,
OrderBy: &qbtypes.OrderBy{
Key: qbtypes.OrderByKey{
TelemetryFieldKey: telemetrytypes.TelemetryFieldKey{
Name: NamespacesOrderByMemory,
},
},
Direction: qbtypes.OrderDirectionDesc,
},
},
wantErr: false,
},
{
name: "orderBy with pod_phase key is rejected",
req: &PostableNamespaces{
Start: 1000,
End: 2000,
Limit: 100,
Offset: 0,
OrderBy: &qbtypes.OrderBy{
Key: qbtypes.OrderByKey{
TelemetryFieldKey: telemetrytypes.TelemetryFieldKey{
Name: "pod_phase",
},
},
Direction: qbtypes.OrderDirectionDesc,
},
},
wantErr: true,
},
{
name: "orderBy with invalid key",
req: &PostableNamespaces{
Start: 1000,
End: 2000,
Limit: 100,
Offset: 0,
OrderBy: &qbtypes.OrderBy{
Key: qbtypes.OrderByKey{
TelemetryFieldKey: telemetrytypes.TelemetryFieldKey{
Name: "unknown",
},
},
Direction: qbtypes.OrderDirectionDesc,
},
},
wantErr: true,
},
{
name: "orderBy with valid key but invalid direction",
req: &PostableNamespaces{
Start: 1000,
End: 2000,
Limit: 100,
Offset: 0,
OrderBy: &qbtypes.OrderBy{
Key: qbtypes.OrderByKey{
TelemetryFieldKey: telemetrytypes.TelemetryFieldKey{
Name: NamespacesOrderByMemory,
},
},
Direction: qbtypes.OrderDirection{String: valuer.NewString("invalid")},
},
},
wantErr: true,
},
}
for _, tt := range tests {
t.Run(tt.name, func(t *testing.T) {
err := tt.req.Validate()
if tt.wantErr {
require.Error(t, err)
require.True(t, errors.Ast(err, errors.TypeInvalidInput), "expected error to be of type InvalidInput")
} else {
require.NoError(t, err)
}
})
}
}

View File

@@ -0,0 +1,103 @@
package inframonitoringtypes
import (
"encoding/json"
"slices"
"github.com/SigNoz/signoz/pkg/errors"
qbtypes "github.com/SigNoz/signoz/pkg/types/querybuildertypes/querybuildertypesv5"
)
type Nodes struct {
Type ResponseType `json:"type" required:"true"`
Records []NodeRecord `json:"records" required:"true"`
Total int `json:"total" required:"true"`
RequiredMetricsCheck RequiredMetricsCheck `json:"requiredMetricsCheck" required:"true"`
EndTimeBeforeRetention bool `json:"endTimeBeforeRetention" required:"true"`
Warning *qbtypes.QueryWarnData `json:"warning,omitempty"`
}
type NodeRecord struct {
NodeName string `json:"nodeName" required:"true"`
Condition NodeCondition `json:"condition" required:"true"`
ReadyNodesCount int `json:"readyNodesCount" required:"true"`
NotReadyNodesCount int `json:"notReadyNodesCount" required:"true"`
NodeCPU float64 `json:"nodeCPU" required:"true"`
NodeCPUAllocatable float64 `json:"nodeCPUAllocatable" required:"true"`
NodeMemory float64 `json:"nodeMemory" required:"true"`
NodeMemoryAllocatable float64 `json:"nodeMemoryAllocatable" required:"true"`
Meta map[string]interface{} `json:"meta" required:"true"`
}
// PostableNodes is the request body for the v2 nodes list API.
type PostableNodes struct {
Start int64 `json:"start" required:"true"`
End int64 `json:"end" required:"true"`
Filter *qbtypes.Filter `json:"filter"`
GroupBy []qbtypes.GroupByKey `json:"groupBy"`
OrderBy *qbtypes.OrderBy `json:"orderBy"`
Offset int `json:"offset"`
Limit int `json:"limit" required:"true"`
}
// Validate ensures PostableNodes contains acceptable values.
func (req *PostableNodes) Validate() error {
if req == nil {
return errors.NewInvalidInputf(errors.CodeInvalidInput, "request is nil")
}
if req.Start <= 0 {
return errors.NewInvalidInputf(
errors.CodeInvalidInput,
"invalid start time %d: start must be greater than 0",
req.Start,
)
}
if req.End <= 0 {
return errors.NewInvalidInputf(
errors.CodeInvalidInput,
"invalid end time %d: end must be greater than 0",
req.End,
)
}
if req.Start >= req.End {
return errors.NewInvalidInputf(
errors.CodeInvalidInput,
"invalid time range: start (%d) must be less than end (%d)",
req.Start,
req.End,
)
}
if req.Limit < 1 || req.Limit > 5000 {
return errors.NewInvalidInputf(errors.CodeInvalidInput, "limit must be between 1 and 5000")
}
if req.Offset < 0 {
return errors.NewInvalidInputf(errors.CodeInvalidInput, "offset cannot be negative")
}
if req.OrderBy != nil {
if !slices.Contains(NodesValidOrderByKeys, req.OrderBy.Key.Name) {
return errors.NewInvalidInputf(errors.CodeInvalidInput, "invalid order by key: %s", req.OrderBy.Key.Name)
}
if req.OrderBy.Direction != qbtypes.OrderDirectionAsc && req.OrderBy.Direction != qbtypes.OrderDirectionDesc {
return errors.NewInvalidInputf(errors.CodeInvalidInput, "invalid order by direction: %s", req.OrderBy.Direction)
}
}
return nil
}
// UnmarshalJSON validates input immediately after decoding.
func (req *PostableNodes) UnmarshalJSON(data []byte) error {
type raw PostableNodes
var decoded raw
if err := json.Unmarshal(data, &decoded); err != nil {
return err
}
*req = PostableNodes(decoded)
return req.Validate()
}

View File

@@ -0,0 +1,42 @@
package inframonitoringtypes
import "github.com/SigNoz/signoz/pkg/valuer"
type NodeCondition struct {
valuer.String
}
var (
NodeConditionReady = NodeCondition{valuer.NewString("ready")}
NodeConditionNotReady = NodeCondition{valuer.NewString("not_ready")}
NodeConditionNone = NodeCondition{valuer.NewString("")}
)
func (NodeCondition) Enum() []any {
return []any{
NodeConditionReady,
NodeConditionNotReady,
NodeConditionNone,
}
}
// Numeric values emitted by the k8s.node.condition_ready metric
// (source: OTel kubeletstats receiver).
const (
NodeConditionNumReady = 1
NodeConditionNumNotReady = 0
)
const (
NodesOrderByCPU = "cpu"
NodesOrderByCPUAllocatable = "cpu_allocatable"
NodesOrderByMemory = "memory"
NodesOrderByMemoryAllocatable = "memory_allocatable"
)
var NodesValidOrderByKeys = []string{
NodesOrderByCPU,
NodesOrderByCPUAllocatable,
NodesOrderByMemory,
NodesOrderByMemoryAllocatable,
}

View File

@@ -0,0 +1,255 @@
package inframonitoringtypes
import (
"testing"
"github.com/SigNoz/signoz/pkg/errors"
qbtypes "github.com/SigNoz/signoz/pkg/types/querybuildertypes/querybuildertypesv5"
"github.com/SigNoz/signoz/pkg/types/telemetrytypes"
"github.com/SigNoz/signoz/pkg/valuer"
"github.com/stretchr/testify/require"
)
func TestPostableNodes_Validate(t *testing.T) {
tests := []struct {
name string
req *PostableNodes
wantErr bool
}{
{
name: "valid request",
req: &PostableNodes{
Start: 1000,
End: 2000,
Limit: 100,
Offset: 0,
},
wantErr: false,
},
{
name: "nil request",
req: nil,
wantErr: true,
},
{
name: "start time zero",
req: &PostableNodes{
Start: 0,
End: 2000,
Limit: 100,
Offset: 0,
},
wantErr: true,
},
{
name: "start time negative",
req: &PostableNodes{
Start: -1000,
End: 2000,
Limit: 100,
Offset: 0,
},
wantErr: true,
},
{
name: "end time zero",
req: &PostableNodes{
Start: 1000,
End: 0,
Limit: 100,
Offset: 0,
},
wantErr: true,
},
{
name: "start time greater than end time",
req: &PostableNodes{
Start: 2000,
End: 1000,
Limit: 100,
Offset: 0,
},
wantErr: true,
},
{
name: "start time equal to end time",
req: &PostableNodes{
Start: 1000,
End: 1000,
Limit: 100,
Offset: 0,
},
wantErr: true,
},
{
name: "limit zero",
req: &PostableNodes{
Start: 1000,
End: 2000,
Limit: 0,
Offset: 0,
},
wantErr: true,
},
{
name: "limit negative",
req: &PostableNodes{
Start: 1000,
End: 2000,
Limit: -10,
Offset: 0,
},
wantErr: true,
},
{
name: "limit exceeds max",
req: &PostableNodes{
Start: 1000,
End: 2000,
Limit: 5001,
Offset: 0,
},
wantErr: true,
},
{
name: "offset negative",
req: &PostableNodes{
Start: 1000,
End: 2000,
Limit: 100,
Offset: -5,
},
wantErr: true,
},
{
name: "orderBy nil is valid",
req: &PostableNodes{
Start: 1000,
End: 2000,
Limit: 100,
Offset: 0,
},
wantErr: false,
},
{
name: "orderBy with valid key cpu and direction asc",
req: &PostableNodes{
Start: 1000,
End: 2000,
Limit: 100,
Offset: 0,
OrderBy: &qbtypes.OrderBy{
Key: qbtypes.OrderByKey{
TelemetryFieldKey: telemetrytypes.TelemetryFieldKey{
Name: NodesOrderByCPU,
},
},
Direction: qbtypes.OrderDirectionAsc,
},
},
wantErr: false,
},
{
name: "orderBy with valid key cpu_allocatable and direction desc",
req: &PostableNodes{
Start: 1000,
End: 2000,
Limit: 100,
Offset: 0,
OrderBy: &qbtypes.OrderBy{
Key: qbtypes.OrderByKey{
TelemetryFieldKey: telemetrytypes.TelemetryFieldKey{
Name: NodesOrderByCPUAllocatable,
},
},
Direction: qbtypes.OrderDirectionDesc,
},
},
wantErr: false,
},
{
name: "orderBy with valid key memory_allocatable and direction asc",
req: &PostableNodes{
Start: 1000,
End: 2000,
Limit: 100,
Offset: 0,
OrderBy: &qbtypes.OrderBy{
Key: qbtypes.OrderByKey{
TelemetryFieldKey: telemetrytypes.TelemetryFieldKey{
Name: NodesOrderByMemoryAllocatable,
},
},
Direction: qbtypes.OrderDirectionAsc,
},
},
wantErr: false,
},
{
name: "orderBy with condition key is rejected",
req: &PostableNodes{
Start: 1000,
End: 2000,
Limit: 100,
Offset: 0,
OrderBy: &qbtypes.OrderBy{
Key: qbtypes.OrderByKey{
TelemetryFieldKey: telemetrytypes.TelemetryFieldKey{
Name: "condition",
},
},
Direction: qbtypes.OrderDirectionDesc,
},
},
wantErr: true,
},
{
name: "orderBy with invalid key",
req: &PostableNodes{
Start: 1000,
End: 2000,
Limit: 100,
Offset: 0,
OrderBy: &qbtypes.OrderBy{
Key: qbtypes.OrderByKey{
TelemetryFieldKey: telemetrytypes.TelemetryFieldKey{
Name: "unknown",
},
},
Direction: qbtypes.OrderDirectionDesc,
},
},
wantErr: true,
},
{
name: "orderBy with valid key but invalid direction",
req: &PostableNodes{
Start: 1000,
End: 2000,
Limit: 100,
Offset: 0,
OrderBy: &qbtypes.OrderBy{
Key: qbtypes.OrderByKey{
TelemetryFieldKey: telemetrytypes.TelemetryFieldKey{
Name: NodesOrderByMemory,
},
},
Direction: qbtypes.OrderDirection{String: valuer.NewString("invalid")},
},
},
wantErr: true,
},
}
for _, tt := range tests {
t.Run(tt.name, func(t *testing.T) {
err := tt.req.Validate()
if tt.wantErr {
require.Error(t, err)
require.True(t, errors.Ast(err, errors.TypeInvalidInput), "expected error to be of type InvalidInput")
} else {
require.NoError(t, err)
}
})
}
}

View File

@@ -11,7 +11,9 @@ import (
"github.com/uptrace/bun"
)
var ErrCodeInvalidPlannedMaintenancePayload = errors.MustNewCode("invalid_planned_maintenance_payload")
var (
ErrCodeInvalidPlannedMaintenancePayload = errors.MustNewCode("invalid_planned_maintenance_payload")
)
type MaintenanceStatus struct {
valuer.String
@@ -61,15 +63,15 @@ type StorablePlannedMaintenance struct {
}
type PlannedMaintenance struct {
ID valuer.UUID `json:"id" required:"true"`
Name string `json:"name" required:"true"`
Description string `json:"description"`
Schedule *Schedule `json:"schedule" required:"true"`
RuleIDs []string `json:"alertIds"`
CreatedAt time.Time `json:"createdAt"`
CreatedBy string `json:"createdBy"`
UpdatedAt time.Time `json:"updatedAt"`
UpdatedBy string `json:"updatedBy"`
ID valuer.UUID `json:"id" required:"true"`
Name string `json:"name" required:"true"`
Description string `json:"description"`
Schedule *Schedule `json:"schedule" required:"true"`
RuleIDs []string `json:"alertIds"`
CreatedAt time.Time `json:"createdAt"`
CreatedBy string `json:"createdBy"`
UpdatedAt time.Time `json:"updatedAt"`
UpdatedBy string `json:"updatedBy"`
Status MaintenanceStatus `json:"status" required:"true"`
Kind MaintenanceKind `json:"kind" required:"true"`
}
@@ -159,11 +161,8 @@ func (m *PlannedMaintenance) ShouldSkip(ruleID string, now time.Time) bool {
currentTime := now.In(loc)
// fixed schedule — only when no recurrence is configured.
// When recurrence is set, the recurring check below handles everything;
// falling through here would cause the window to match the absolute
// StartTimeEndTime range instead of the daily/weekly/monthly pattern.
if m.Schedule.Recurrence == nil && !m.Schedule.StartTime.IsZero() && !m.Schedule.EndTime.IsZero() {
// fixed schedule
if !m.Schedule.StartTime.IsZero() && !m.Schedule.EndTime.IsZero() {
startTime := m.Schedule.StartTime.In(loc)
endTime := m.Schedule.EndTime.In(loc)
if currentTime.Equal(startTime) || currentTime.Equal(endTime) ||

View File

@@ -13,6 +13,7 @@ func timePtr(t time.Time) *time.Time {
}
func TestShouldSkipMaintenance(t *testing.T) {
cases := []struct {
name string
maintenance *PlannedMaintenance
@@ -498,7 +499,7 @@ func TestShouldSkipMaintenance(t *testing.T) {
},
},
},
ts: time.Date(2024, 4, 1, 12, 10, 0, 0, time.UTC),
ts: time.Date(2024, 04, 1, 12, 10, 0, 0, time.UTC),
skip: true,
},
{
@@ -507,14 +508,14 @@ func TestShouldSkipMaintenance(t *testing.T) {
Schedule: &Schedule{
Timezone: "UTC",
Recurrence: &Recurrence{
StartTime: time.Date(2024, 4, 1, 12, 0, 0, 0, time.UTC),
StartTime: time.Date(2024, 04, 01, 12, 0, 0, 0, time.UTC),
Duration: valuer.MustParseTextDuration("2h"),
RepeatType: RepeatTypeWeekly,
RepeatOn: []RepeatOn{RepeatOnMonday},
},
},
},
ts: time.Date(2024, 4, 15, 12, 10, 0, 0, time.UTC),
ts: time.Date(2024, 04, 15, 12, 10, 0, 0, time.UTC),
skip: true,
},
{
@@ -523,14 +524,14 @@ func TestShouldSkipMaintenance(t *testing.T) {
Schedule: &Schedule{
Timezone: "UTC",
Recurrence: &Recurrence{
StartTime: time.Date(2024, 4, 1, 12, 0, 0, 0, time.UTC),
StartTime: time.Date(2024, 04, 01, 12, 0, 0, 0, time.UTC),
Duration: valuer.MustParseTextDuration("2h"),
RepeatType: RepeatTypeWeekly,
RepeatOn: []RepeatOn{RepeatOnMonday},
},
},
},
ts: time.Date(2024, 4, 14, 12, 10, 0, 0, time.UTC), // 14th 04 is sunday
ts: time.Date(2024, 04, 14, 12, 10, 0, 0, time.UTC), // 14th 04 is sunday
skip: false,
},
{
@@ -539,14 +540,14 @@ func TestShouldSkipMaintenance(t *testing.T) {
Schedule: &Schedule{
Timezone: "UTC",
Recurrence: &Recurrence{
StartTime: time.Date(2024, 4, 1, 12, 0, 0, 0, time.UTC),
StartTime: time.Date(2024, 04, 01, 12, 0, 0, 0, time.UTC),
Duration: valuer.MustParseTextDuration("2h"),
RepeatType: RepeatTypeWeekly,
RepeatOn: []RepeatOn{RepeatOnMonday},
},
},
},
ts: time.Date(2024, 4, 16, 12, 10, 0, 0, time.UTC), // 16th 04 is tuesday
ts: time.Date(2024, 04, 16, 12, 10, 0, 0, time.UTC), // 16th 04 is tuesday
skip: false,
},
{
@@ -555,14 +556,14 @@ func TestShouldSkipMaintenance(t *testing.T) {
Schedule: &Schedule{
Timezone: "UTC",
Recurrence: &Recurrence{
StartTime: time.Date(2024, 4, 1, 12, 0, 0, 0, time.UTC),
StartTime: time.Date(2024, 04, 01, 12, 0, 0, 0, time.UTC),
Duration: valuer.MustParseTextDuration("2h"),
RepeatType: RepeatTypeWeekly,
RepeatOn: []RepeatOn{RepeatOnMonday},
},
},
},
ts: time.Date(2024, 5, 6, 12, 10, 0, 0, time.UTC),
ts: time.Date(2024, 05, 06, 12, 10, 0, 0, time.UTC),
skip: true,
},
{
@@ -571,14 +572,14 @@ func TestShouldSkipMaintenance(t *testing.T) {
Schedule: &Schedule{
Timezone: "UTC",
Recurrence: &Recurrence{
StartTime: time.Date(2024, 4, 1, 12, 0, 0, 0, time.UTC),
StartTime: time.Date(2024, 04, 01, 12, 0, 0, 0, time.UTC),
Duration: valuer.MustParseTextDuration("2h"),
RepeatType: RepeatTypeWeekly,
RepeatOn: []RepeatOn{RepeatOnMonday},
},
},
},
ts: time.Date(2024, 5, 6, 14, 0, 0, 0, time.UTC),
ts: time.Date(2024, 05, 06, 14, 00, 0, 0, time.UTC),
skip: true,
},
{
@@ -587,13 +588,13 @@ func TestShouldSkipMaintenance(t *testing.T) {
Schedule: &Schedule{
Timezone: "UTC",
Recurrence: &Recurrence{
StartTime: time.Date(2024, 4, 4, 12, 0, 0, 0, time.UTC),
StartTime: time.Date(2024, 04, 04, 12, 0, 0, 0, time.UTC),
Duration: valuer.MustParseTextDuration("2h"),
RepeatType: RepeatTypeMonthly,
},
},
},
ts: time.Date(2024, 4, 4, 12, 10, 0, 0, time.UTC),
ts: time.Date(2024, 04, 04, 12, 10, 0, 0, time.UTC),
skip: true,
},
{
@@ -602,13 +603,13 @@ func TestShouldSkipMaintenance(t *testing.T) {
Schedule: &Schedule{
Timezone: "UTC",
Recurrence: &Recurrence{
StartTime: time.Date(2024, 4, 4, 12, 0, 0, 0, time.UTC),
StartTime: time.Date(2024, 04, 04, 12, 0, 0, 0, time.UTC),
Duration: valuer.MustParseTextDuration("2h"),
RepeatType: RepeatTypeMonthly,
},
},
},
ts: time.Date(2024, 4, 4, 14, 10, 0, 0, time.UTC),
ts: time.Date(2024, 04, 04, 14, 10, 0, 0, time.UTC),
skip: false,
},
{
@@ -617,52 +618,13 @@ func TestShouldSkipMaintenance(t *testing.T) {
Schedule: &Schedule{
Timezone: "UTC",
Recurrence: &Recurrence{
StartTime: time.Date(2024, 4, 4, 12, 0, 0, 0, time.UTC),
StartTime: time.Date(2024, 04, 04, 12, 0, 0, 0, time.UTC),
Duration: valuer.MustParseTextDuration("2h"),
RepeatType: RepeatTypeMonthly,
},
},
},
ts: time.Date(2024, 5, 4, 12, 10, 0, 0, time.UTC),
skip: true,
},
// The recurrence should govern, when set. Not the fixed range.
{
name: "recurring-daily-with-fixed-times-outside-daily-window",
maintenance: &PlannedMaintenance{
Schedule: &Schedule{
Timezone: "UTC",
// These fixed fields should be ignored when Recurrence is set.
StartTime: time.Date(2026, 4, 1, 10, 0, 0, 0, time.UTC),
EndTime: time.Date(2026, 4, 30, 18, 0, 0, 0, time.UTC),
Recurrence: &Recurrence{
StartTime: time.Date(2026, 4, 1, 14, 0, 0, 0, time.UTC), // daily at 14:00
Duration: valuer.MustParseTextDuration("2h"), // until 16:00
RepeatType: RepeatTypeDaily,
},
},
},
// 11:00 is inside the fixed range but outside the daily 14:00-16:00 window.
// Before the fix this returned true (bug); after fix it returns false.
ts: time.Date(2026, 4, 15, 11, 0, 0, 0, time.UTC),
skip: false,
},
{
name: "recurring-daily-with-fixed-times-inside-daily-window",
maintenance: &PlannedMaintenance{
Schedule: &Schedule{
Timezone: "UTC",
StartTime: time.Date(2026, 4, 1, 10, 0, 0, 0, time.UTC),
EndTime: time.Date(2026, 4, 30, 18, 0, 0, 0, time.UTC),
Recurrence: &Recurrence{
StartTime: time.Date(2026, 4, 1, 14, 0, 0, 0, time.UTC),
Duration: valuer.MustParseTextDuration("2h"),
RepeatType: RepeatTypeDaily,
},
},
},
// 15:00 is inside the daily 14:00-16:00 window — should skip.
ts: time.Date(2026, 4, 15, 15, 0, 0, 0, time.UTC),
ts: time.Date(2024, 05, 04, 12, 10, 0, 0, time.UTC),
skip: true,
},
}