mirror of
https://github.com/SigNoz/signoz.git
synced 2026-04-28 14:40:32 +01:00
Compare commits
11 Commits
nv/v2-dash
...
alertmanag
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
87bf9257b4 | ||
|
|
2d7e91c669 | ||
|
|
12d487b91c | ||
|
|
c7bee0b6d3 | ||
|
|
545de41c10 | ||
|
|
5c2a355030 | ||
|
|
fa9ad41a1b | ||
|
|
a8790c6fe9 | ||
|
|
d0e748ed37 | ||
|
|
ffd493617f | ||
|
|
3f95d35ad5 |
56
.github/CODEOWNERS
vendored
56
.github/CODEOWNERS
vendored
@@ -52,49 +52,49 @@ go.mod @therealpandey
|
||||
|
||||
# Querier Owners
|
||||
|
||||
/pkg/querier/ @srikanthccv
|
||||
/pkg/variables/ @srikanthccv
|
||||
/pkg/types/querybuildertypes/ @srikanthccv
|
||||
/pkg/types/telemetrytypes/ @srikanthccv
|
||||
/pkg/querybuilder/ @srikanthccv
|
||||
/pkg/telemetrylogs/ @srikanthccv
|
||||
/pkg/telemetrymetadata/ @srikanthccv
|
||||
/pkg/telemetrymetrics/ @srikanthccv
|
||||
/pkg/telemetrytraces/ @srikanthccv
|
||||
/pkg/querier/ @srikanthccv @therealpandey
|
||||
/pkg/variables/ @srikanthccv @therealpandey
|
||||
/pkg/types/querybuildertypes/ @srikanthccv @therealpandey
|
||||
/pkg/types/telemetrytypes/ @srikanthccv @therealpandey
|
||||
/pkg/querybuilder/ @srikanthccv @therealpandey
|
||||
/pkg/telemetrylogs/ @srikanthccv @therealpandey
|
||||
/pkg/telemetrymetadata/ @srikanthccv @therealpandey
|
||||
/pkg/telemetrymetrics/ @srikanthccv @therealpandey
|
||||
/pkg/telemetrytraces/ @srikanthccv @therealpandey
|
||||
|
||||
# Metrics
|
||||
|
||||
/pkg/types/metrictypes/ @srikanthccv
|
||||
/pkg/types/metricsexplorertypes/ @srikanthccv
|
||||
/pkg/modules/metricsexplorer/ @srikanthccv
|
||||
/pkg/prometheus/ @srikanthccv
|
||||
/pkg/types/metrictypes/ @srikanthccv @therealpandey
|
||||
/pkg/types/metricsexplorertypes/ @srikanthccv @therealpandey
|
||||
/pkg/modules/metricsexplorer/ @srikanthccv @therealpandey
|
||||
/pkg/prometheus/ @srikanthccv @therealpandey
|
||||
|
||||
# APM
|
||||
|
||||
/pkg/types/servicetypes/ @srikanthccv
|
||||
/pkg/types/apdextypes/ @srikanthccv
|
||||
/pkg/modules/apdex/ @srikanthccv
|
||||
/pkg/modules/services/ @srikanthccv
|
||||
/pkg/types/servicetypes/ @srikanthccv @therealpandey
|
||||
/pkg/types/apdextypes/ @srikanthccv @therealpandey
|
||||
/pkg/modules/apdex/ @srikanthccv @therealpandey
|
||||
/pkg/modules/services/ @srikanthccv @therealpandey
|
||||
|
||||
# Dashboard
|
||||
|
||||
/pkg/types/dashboardtypes/ @srikanthccv
|
||||
/pkg/modules/dashboard/ @srikanthccv
|
||||
/pkg/types/dashboardtypes/ @srikanthccv @therealpandey
|
||||
/pkg/modules/dashboard/ @srikanthccv @therealpandey
|
||||
|
||||
# Rule/Alertmanager
|
||||
|
||||
/pkg/types/ruletypes/ @srikanthccv
|
||||
/pkg/types/alertmanagertypes @srikanthccv
|
||||
/pkg/alertmanager/ @srikanthccv
|
||||
/pkg/ruler/ @srikanthccv
|
||||
/pkg/modules/rulestatehistory/ @srikanthccv
|
||||
/pkg/types/rulestatehistorytypes/ @srikanthccv
|
||||
/pkg/types/ruletypes/ @srikanthccv @therealpandey
|
||||
/pkg/types/alertmanagertypes @srikanthccv @therealpandey
|
||||
/pkg/alertmanager/ @srikanthccv @therealpandey
|
||||
/pkg/ruler/ @srikanthccv @therealpandey
|
||||
/pkg/modules/rulestatehistory/ @srikanthccv @therealpandey
|
||||
/pkg/types/rulestatehistorytypes/ @srikanthccv @therealpandey
|
||||
|
||||
# Correlation-adjacent
|
||||
|
||||
/pkg/contextlinks/ @srikanthccv
|
||||
/pkg/types/parsertypes/ @srikanthccv
|
||||
/pkg/queryparser/ @srikanthccv
|
||||
/pkg/contextlinks/ @srikanthccv @therealpandey
|
||||
/pkg/types/parsertypes/ @srikanthccv @therealpandey
|
||||
/pkg/queryparser/ @srikanthccv @therealpandey
|
||||
|
||||
# AuthN / AuthZ Owners
|
||||
|
||||
|
||||
@@ -2474,6 +2474,97 @@ components:
|
||||
- requiredMetricsCheck
|
||||
- endTimeBeforeRetention
|
||||
type: object
|
||||
InframonitoringtypesPodPhase:
|
||||
enum:
|
||||
- pending
|
||||
- running
|
||||
- succeeded
|
||||
- failed
|
||||
- unknown
|
||||
- ""
|
||||
type: string
|
||||
InframonitoringtypesPodRecord:
|
||||
properties:
|
||||
failedPodCount:
|
||||
type: integer
|
||||
meta:
|
||||
additionalProperties: {}
|
||||
nullable: true
|
||||
type: object
|
||||
pendingPodCount:
|
||||
type: integer
|
||||
podAge:
|
||||
format: int64
|
||||
type: integer
|
||||
podCPU:
|
||||
format: double
|
||||
type: number
|
||||
podCPULimit:
|
||||
format: double
|
||||
type: number
|
||||
podCPURequest:
|
||||
format: double
|
||||
type: number
|
||||
podMemory:
|
||||
format: double
|
||||
type: number
|
||||
podMemoryLimit:
|
||||
format: double
|
||||
type: number
|
||||
podMemoryRequest:
|
||||
format: double
|
||||
type: number
|
||||
podPhase:
|
||||
$ref: '#/components/schemas/InframonitoringtypesPodPhase'
|
||||
podUID:
|
||||
type: string
|
||||
runningPodCount:
|
||||
type: integer
|
||||
succeededPodCount:
|
||||
type: integer
|
||||
unknownPodCount:
|
||||
type: integer
|
||||
required:
|
||||
- podUID
|
||||
- podCPU
|
||||
- podCPURequest
|
||||
- podCPULimit
|
||||
- podMemory
|
||||
- podMemoryRequest
|
||||
- podMemoryLimit
|
||||
- podPhase
|
||||
- pendingPodCount
|
||||
- runningPodCount
|
||||
- succeededPodCount
|
||||
- failedPodCount
|
||||
- unknownPodCount
|
||||
- podAge
|
||||
- meta
|
||||
type: object
|
||||
InframonitoringtypesPods:
|
||||
properties:
|
||||
endTimeBeforeRetention:
|
||||
type: boolean
|
||||
records:
|
||||
items:
|
||||
$ref: '#/components/schemas/InframonitoringtypesPodRecord'
|
||||
nullable: true
|
||||
type: array
|
||||
requiredMetricsCheck:
|
||||
$ref: '#/components/schemas/InframonitoringtypesRequiredMetricsCheck'
|
||||
total:
|
||||
type: integer
|
||||
type:
|
||||
$ref: '#/components/schemas/InframonitoringtypesResponseType'
|
||||
warning:
|
||||
$ref: '#/components/schemas/Querybuildertypesv5QueryWarnData'
|
||||
required:
|
||||
- type
|
||||
- records
|
||||
- total
|
||||
- requiredMetricsCheck
|
||||
- endTimeBeforeRetention
|
||||
type: object
|
||||
InframonitoringtypesPostableHosts:
|
||||
properties:
|
||||
end:
|
||||
@@ -2500,6 +2591,32 @@ components:
|
||||
- end
|
||||
- limit
|
||||
type: object
|
||||
InframonitoringtypesPostablePods:
|
||||
properties:
|
||||
end:
|
||||
format: int64
|
||||
type: integer
|
||||
filter:
|
||||
$ref: '#/components/schemas/Querybuildertypesv5Filter'
|
||||
groupBy:
|
||||
items:
|
||||
$ref: '#/components/schemas/Querybuildertypesv5GroupByKey'
|
||||
nullable: true
|
||||
type: array
|
||||
limit:
|
||||
type: integer
|
||||
offset:
|
||||
type: integer
|
||||
orderBy:
|
||||
$ref: '#/components/schemas/Querybuildertypesv5OrderBy'
|
||||
start:
|
||||
format: int64
|
||||
type: integer
|
||||
required:
|
||||
- start
|
||||
- end
|
||||
- limit
|
||||
type: object
|
||||
InframonitoringtypesRequiredMetricsCheck:
|
||||
properties:
|
||||
missingMetrics:
|
||||
@@ -10886,7 +11003,9 @@ paths:
|
||||
five metrics, and pagination via offset/limit. The response type is ''list''
|
||||
for the default host.name grouping or ''grouped_list'' for custom groupBy
|
||||
keys. Also reports missing required metrics and whether the requested time
|
||||
range falls before the data retention boundary.'
|
||||
range falls before the data retention boundary. Numeric metric fields (cpu,
|
||||
memory, wait, load15, diskUsage) return -1 as a sentinel when no data is available
|
||||
for that field.'
|
||||
operationId: ListHosts
|
||||
requestBody:
|
||||
content:
|
||||
@@ -10940,6 +11059,79 @@ paths:
|
||||
summary: List Hosts for Infra Monitoring
|
||||
tags:
|
||||
- inframonitoring
|
||||
/api/v2/infra_monitoring/pods:
|
||||
post:
|
||||
deprecated: false
|
||||
description: 'Returns a paginated list of Kubernetes pods with key metrics:
|
||||
CPU usage, CPU request/limit utilization, memory working set, memory request/limit
|
||||
utilization, current pod phase (pending/running/succeeded/failed/unknown),
|
||||
and pod age (ms since start time). Each pod includes metadata attributes (namespace,
|
||||
node, workload owner such as deployment/statefulset/daemonset/job/cronjob,
|
||||
cluster). Supports filtering via a filter expression, custom groupBy to aggregate
|
||||
pods by any attribute, ordering by any of the six metrics (cpu, cpu_request,
|
||||
cpu_limit, memory, memory_request, memory_limit), and pagination via offset/limit.
|
||||
The response type is ''list'' for the default k8s.pod.uid grouping (each row
|
||||
is one pod with its current phase) or ''grouped_list'' for custom groupBy
|
||||
keys (each row aggregates pods in the group with per-phase counts: pendingPodCount,
|
||||
runningPodCount, succeededPodCount, failedPodCount, unknownPodCount derived
|
||||
from each pod''s latest phase in the window). Also reports missing required
|
||||
metrics and whether the requested time range falls before the data retention
|
||||
boundary. Numeric metric fields (podCPU, podCPURequest, podCPULimit, podMemory,
|
||||
podMemoryRequest, podMemoryLimit, podAge) return -1 as a sentinel when no
|
||||
data is available for that field.'
|
||||
operationId: ListPods
|
||||
requestBody:
|
||||
content:
|
||||
application/json:
|
||||
schema:
|
||||
$ref: '#/components/schemas/InframonitoringtypesPostablePods'
|
||||
responses:
|
||||
"200":
|
||||
content:
|
||||
application/json:
|
||||
schema:
|
||||
properties:
|
||||
data:
|
||||
$ref: '#/components/schemas/InframonitoringtypesPods'
|
||||
status:
|
||||
type: string
|
||||
required:
|
||||
- status
|
||||
- data
|
||||
type: object
|
||||
description: OK
|
||||
"400":
|
||||
content:
|
||||
application/json:
|
||||
schema:
|
||||
$ref: '#/components/schemas/RenderErrorResponse'
|
||||
description: Bad Request
|
||||
"401":
|
||||
content:
|
||||
application/json:
|
||||
schema:
|
||||
$ref: '#/components/schemas/RenderErrorResponse'
|
||||
description: Unauthorized
|
||||
"403":
|
||||
content:
|
||||
application/json:
|
||||
schema:
|
||||
$ref: '#/components/schemas/RenderErrorResponse'
|
||||
description: Forbidden
|
||||
"500":
|
||||
content:
|
||||
application/json:
|
||||
schema:
|
||||
$ref: '#/components/schemas/RenderErrorResponse'
|
||||
description: Internal Server Error
|
||||
security:
|
||||
- api_key:
|
||||
- VIEWER
|
||||
- tokenizer:
|
||||
- VIEWER
|
||||
summary: List Pods for Infra Monitoring
|
||||
tags:
|
||||
- inframonitoring
|
||||
/api/v2/livez:
|
||||
get:
|
||||
deprecated: false
|
||||
|
||||
@@ -13,7 +13,6 @@ import (
|
||||
"github.com/SigNoz/signoz/pkg/errors"
|
||||
baserules "github.com/SigNoz/signoz/pkg/query-service/rules"
|
||||
"github.com/SigNoz/signoz/pkg/types/ruletypes"
|
||||
"github.com/SigNoz/signoz/pkg/valuer"
|
||||
)
|
||||
|
||||
func PrepareTaskFunc(opts baserules.PrepareTaskOptions) (baserules.Task, error) {
|
||||
@@ -49,7 +48,7 @@ func PrepareTaskFunc(opts baserules.PrepareTaskOptions) (baserules.Task, error)
|
||||
rules = append(rules, tr)
|
||||
|
||||
// create ch rule task for evaluation
|
||||
task = newTask(baserules.TaskTypeCh, opts.TaskName, evaluation.GetFrequency().Duration(), rules, opts.ManagerOpts, opts.NotifyFunc, opts.MaintenanceStore, opts.OrgID)
|
||||
task = newTask(baserules.TaskTypeCh, opts.TaskName, evaluation.GetFrequency().Duration(), rules, opts.ManagerOpts, opts.NotifyFunc)
|
||||
|
||||
} else if opts.Rule.RuleType == ruletypes.RuleTypeProm {
|
||||
|
||||
@@ -73,7 +72,7 @@ func PrepareTaskFunc(opts baserules.PrepareTaskOptions) (baserules.Task, error)
|
||||
rules = append(rules, pr)
|
||||
|
||||
// create promql rule task for evaluation
|
||||
task = newTask(baserules.TaskTypeProm, opts.TaskName, evaluation.GetFrequency().Duration(), rules, opts.ManagerOpts, opts.NotifyFunc, opts.MaintenanceStore, opts.OrgID)
|
||||
task = newTask(baserules.TaskTypeProm, opts.TaskName, evaluation.GetFrequency().Duration(), rules, opts.ManagerOpts, opts.NotifyFunc)
|
||||
|
||||
} else if opts.Rule.RuleType == ruletypes.RuleTypeAnomaly {
|
||||
// create anomaly rule
|
||||
@@ -96,7 +95,7 @@ func PrepareTaskFunc(opts baserules.PrepareTaskOptions) (baserules.Task, error)
|
||||
rules = append(rules, ar)
|
||||
|
||||
// create anomaly rule task for evaluation
|
||||
task = newTask(baserules.TaskTypeCh, opts.TaskName, evaluation.GetFrequency().Duration(), rules, opts.ManagerOpts, opts.NotifyFunc, opts.MaintenanceStore, opts.OrgID)
|
||||
task = newTask(baserules.TaskTypeCh, opts.TaskName, evaluation.GetFrequency().Duration(), rules, opts.ManagerOpts, opts.NotifyFunc)
|
||||
|
||||
} else {
|
||||
return nil, errors.NewInvalidInputf(errors.CodeInvalidInput, "unsupported rule type %s. Supported types: %s, %s", opts.Rule.RuleType, ruletypes.RuleTypeProm, ruletypes.RuleTypeThreshold)
|
||||
@@ -210,9 +209,9 @@ func TestNotification(opts baserules.PrepareTestRuleOptions) (int, error) {
|
||||
}
|
||||
|
||||
// newTask returns an appropriate group for the rule type
|
||||
func newTask(taskType baserules.TaskType, name string, frequency time.Duration, rules []baserules.Rule, opts *baserules.ManagerOptions, notify baserules.NotifyFunc, maintenanceStore ruletypes.MaintenanceStore, orgID valuer.UUID) baserules.Task {
|
||||
func newTask(taskType baserules.TaskType, name string, frequency time.Duration, rules []baserules.Rule, opts *baserules.ManagerOptions, notify baserules.NotifyFunc) baserules.Task {
|
||||
if taskType == baserules.TaskTypeCh {
|
||||
return baserules.NewRuleTask(name, "", frequency, rules, opts, notify, maintenanceStore, orgID)
|
||||
return baserules.NewRuleTask(name, "", frequency, rules, opts, notify)
|
||||
}
|
||||
return baserules.NewPromRuleTask(name, "", frequency, rules, opts, notify, maintenanceStore, orgID)
|
||||
return baserules.NewPromRuleTask(name, "", frequency, rules, opts, notify)
|
||||
}
|
||||
|
||||
@@ -13,7 +13,9 @@ import type {
|
||||
|
||||
import type {
|
||||
InframonitoringtypesPostableHostsDTO,
|
||||
InframonitoringtypesPostablePodsDTO,
|
||||
ListHosts200,
|
||||
ListPods200,
|
||||
RenderErrorResponseDTO,
|
||||
} from '../sigNoz.schemas';
|
||||
|
||||
@@ -21,7 +23,7 @@ import { GeneratedAPIInstance } from '../../../generatedAPIInstance';
|
||||
import type { ErrorType, BodyType } from '../../../generatedAPIInstance';
|
||||
|
||||
/**
|
||||
* Returns a paginated list of hosts with key infrastructure metrics: CPU usage (%), memory usage (%), I/O wait (%), disk usage (%), and 15-minute load average. Each host includes its current status (active/inactive based on metrics reported in the last 10 minutes) and metadata attributes (e.g., os.type). Supports filtering via a filter expression, filtering by host status, custom groupBy to aggregate hosts by any attribute, ordering by any of the five metrics, and pagination via offset/limit. The response type is 'list' for the default host.name grouping or 'grouped_list' for custom groupBy keys. Also reports missing required metrics and whether the requested time range falls before the data retention boundary.
|
||||
* Returns a paginated list of hosts with key infrastructure metrics: CPU usage (%), memory usage (%), I/O wait (%), disk usage (%), and 15-minute load average. Each host includes its current status (active/inactive based on metrics reported in the last 10 minutes) and metadata attributes (e.g., os.type). Supports filtering via a filter expression, filtering by host status, custom groupBy to aggregate hosts by any attribute, ordering by any of the five metrics, and pagination via offset/limit. The response type is 'list' for the default host.name grouping or 'grouped_list' for custom groupBy keys. Also reports missing required metrics and whether the requested time range falls before the data retention boundary. Numeric metric fields (cpu, memory, wait, load15, diskUsage) return -1 as a sentinel when no data is available for that field.
|
||||
* @summary List Hosts for Infra Monitoring
|
||||
*/
|
||||
export const listHosts = (
|
||||
@@ -104,3 +106,87 @@ export const useListHosts = <
|
||||
|
||||
return useMutation(mutationOptions);
|
||||
};
|
||||
/**
|
||||
* Returns a paginated list of Kubernetes pods with key metrics: CPU usage, CPU request/limit utilization, memory working set, memory request/limit utilization, current pod phase (pending/running/succeeded/failed/unknown), and pod age (ms since start time). Each pod includes metadata attributes (namespace, node, workload owner such as deployment/statefulset/daemonset/job/cronjob, cluster). Supports filtering via a filter expression, custom groupBy to aggregate pods by any attribute, ordering by any of the six metrics (cpu, cpu_request, cpu_limit, memory, memory_request, memory_limit), and pagination via offset/limit. The response type is 'list' for the default k8s.pod.uid grouping (each row is one pod with its current phase) or 'grouped_list' for custom groupBy keys (each row aggregates pods in the group with per-phase counts: pendingPodCount, runningPodCount, succeededPodCount, failedPodCount, unknownPodCount derived from each pod's latest phase in the window). Also reports missing required metrics and whether the requested time range falls before the data retention boundary. Numeric metric fields (podCPU, podCPURequest, podCPULimit, podMemory, podMemoryRequest, podMemoryLimit, podAge) return -1 as a sentinel when no data is available for that field.
|
||||
* @summary List Pods for Infra Monitoring
|
||||
*/
|
||||
export const listPods = (
|
||||
inframonitoringtypesPostablePodsDTO: BodyType<InframonitoringtypesPostablePodsDTO>,
|
||||
signal?: AbortSignal,
|
||||
) => {
|
||||
return GeneratedAPIInstance<ListPods200>({
|
||||
url: `/api/v2/infra_monitoring/pods`,
|
||||
method: 'POST',
|
||||
headers: { 'Content-Type': 'application/json' },
|
||||
data: inframonitoringtypesPostablePodsDTO,
|
||||
signal,
|
||||
});
|
||||
};
|
||||
|
||||
export const getListPodsMutationOptions = <
|
||||
TError = ErrorType<RenderErrorResponseDTO>,
|
||||
TContext = unknown,
|
||||
>(options?: {
|
||||
mutation?: UseMutationOptions<
|
||||
Awaited<ReturnType<typeof listPods>>,
|
||||
TError,
|
||||
{ data: BodyType<InframonitoringtypesPostablePodsDTO> },
|
||||
TContext
|
||||
>;
|
||||
}): UseMutationOptions<
|
||||
Awaited<ReturnType<typeof listPods>>,
|
||||
TError,
|
||||
{ data: BodyType<InframonitoringtypesPostablePodsDTO> },
|
||||
TContext
|
||||
> => {
|
||||
const mutationKey = ['listPods'];
|
||||
const { mutation: mutationOptions } = options
|
||||
? options.mutation &&
|
||||
'mutationKey' in options.mutation &&
|
||||
options.mutation.mutationKey
|
||||
? options
|
||||
: { ...options, mutation: { ...options.mutation, mutationKey } }
|
||||
: { mutation: { mutationKey } };
|
||||
|
||||
const mutationFn: MutationFunction<
|
||||
Awaited<ReturnType<typeof listPods>>,
|
||||
{ data: BodyType<InframonitoringtypesPostablePodsDTO> }
|
||||
> = (props) => {
|
||||
const { data } = props ?? {};
|
||||
|
||||
return listPods(data);
|
||||
};
|
||||
|
||||
return { mutationFn, ...mutationOptions };
|
||||
};
|
||||
|
||||
export type ListPodsMutationResult = NonNullable<
|
||||
Awaited<ReturnType<typeof listPods>>
|
||||
>;
|
||||
export type ListPodsMutationBody =
|
||||
BodyType<InframonitoringtypesPostablePodsDTO>;
|
||||
export type ListPodsMutationError = ErrorType<RenderErrorResponseDTO>;
|
||||
|
||||
/**
|
||||
* @summary List Pods for Infra Monitoring
|
||||
*/
|
||||
export const useListPods = <
|
||||
TError = ErrorType<RenderErrorResponseDTO>,
|
||||
TContext = unknown,
|
||||
>(options?: {
|
||||
mutation?: UseMutationOptions<
|
||||
Awaited<ReturnType<typeof listPods>>,
|
||||
TError,
|
||||
{ data: BodyType<InframonitoringtypesPostablePodsDTO> },
|
||||
TContext
|
||||
>;
|
||||
}): UseMutationResult<
|
||||
Awaited<ReturnType<typeof listPods>>,
|
||||
TError,
|
||||
{ data: BodyType<InframonitoringtypesPostablePodsDTO> },
|
||||
TContext
|
||||
> => {
|
||||
const mutationOptions = getListPodsMutationOptions(options);
|
||||
|
||||
return useMutation(mutationOptions);
|
||||
};
|
||||
|
||||
@@ -3243,6 +3243,108 @@ export interface InframonitoringtypesHostsDTO {
|
||||
warning?: Querybuildertypesv5QueryWarnDataDTO;
|
||||
}
|
||||
|
||||
export enum InframonitoringtypesPodPhaseDTO {
|
||||
pending = 'pending',
|
||||
running = 'running',
|
||||
succeeded = 'succeeded',
|
||||
failed = 'failed',
|
||||
unknown = 'unknown',
|
||||
'' = '',
|
||||
}
|
||||
/**
|
||||
* @nullable
|
||||
*/
|
||||
export type InframonitoringtypesPodRecordDTOMeta = {
|
||||
[key: string]: unknown;
|
||||
} | null;
|
||||
|
||||
export interface InframonitoringtypesPodRecordDTO {
|
||||
/**
|
||||
* @type integer
|
||||
*/
|
||||
failedPodCount: number;
|
||||
/**
|
||||
* @type object
|
||||
* @nullable true
|
||||
*/
|
||||
meta: InframonitoringtypesPodRecordDTOMeta;
|
||||
/**
|
||||
* @type integer
|
||||
*/
|
||||
pendingPodCount: number;
|
||||
/**
|
||||
* @type integer
|
||||
* @format int64
|
||||
*/
|
||||
podAge: number;
|
||||
/**
|
||||
* @type number
|
||||
* @format double
|
||||
*/
|
||||
podCPU: number;
|
||||
/**
|
||||
* @type number
|
||||
* @format double
|
||||
*/
|
||||
podCPULimit: number;
|
||||
/**
|
||||
* @type number
|
||||
* @format double
|
||||
*/
|
||||
podCPURequest: number;
|
||||
/**
|
||||
* @type number
|
||||
* @format double
|
||||
*/
|
||||
podMemory: number;
|
||||
/**
|
||||
* @type number
|
||||
* @format double
|
||||
*/
|
||||
podMemoryLimit: number;
|
||||
/**
|
||||
* @type number
|
||||
* @format double
|
||||
*/
|
||||
podMemoryRequest: number;
|
||||
podPhase: InframonitoringtypesPodPhaseDTO;
|
||||
/**
|
||||
* @type string
|
||||
*/
|
||||
podUID: string;
|
||||
/**
|
||||
* @type integer
|
||||
*/
|
||||
runningPodCount: number;
|
||||
/**
|
||||
* @type integer
|
||||
*/
|
||||
succeededPodCount: number;
|
||||
/**
|
||||
* @type integer
|
||||
*/
|
||||
unknownPodCount: number;
|
||||
}
|
||||
|
||||
export interface InframonitoringtypesPodsDTO {
|
||||
/**
|
||||
* @type boolean
|
||||
*/
|
||||
endTimeBeforeRetention: boolean;
|
||||
/**
|
||||
* @type array
|
||||
* @nullable true
|
||||
*/
|
||||
records: InframonitoringtypesPodRecordDTO[] | null;
|
||||
requiredMetricsCheck: InframonitoringtypesRequiredMetricsCheckDTO;
|
||||
/**
|
||||
* @type integer
|
||||
*/
|
||||
total: number;
|
||||
type: InframonitoringtypesResponseTypeDTO;
|
||||
warning?: Querybuildertypesv5QueryWarnDataDTO;
|
||||
}
|
||||
|
||||
export interface InframonitoringtypesPostableHostsDTO {
|
||||
/**
|
||||
* @type integer
|
||||
@@ -3271,6 +3373,34 @@ export interface InframonitoringtypesPostableHostsDTO {
|
||||
start: number;
|
||||
}
|
||||
|
||||
export interface InframonitoringtypesPostablePodsDTO {
|
||||
/**
|
||||
* @type integer
|
||||
* @format int64
|
||||
*/
|
||||
end: number;
|
||||
filter?: Querybuildertypesv5FilterDTO;
|
||||
/**
|
||||
* @type array
|
||||
* @nullable true
|
||||
*/
|
||||
groupBy?: Querybuildertypesv5GroupByKeyDTO[] | null;
|
||||
/**
|
||||
* @type integer
|
||||
*/
|
||||
limit: number;
|
||||
/**
|
||||
* @type integer
|
||||
*/
|
||||
offset?: number;
|
||||
orderBy?: Querybuildertypesv5OrderByDTO;
|
||||
/**
|
||||
* @type integer
|
||||
* @format int64
|
||||
*/
|
||||
start: number;
|
||||
}
|
||||
|
||||
export interface InframonitoringtypesRequiredMetricsCheckDTO {
|
||||
/**
|
||||
* @type array
|
||||
@@ -7350,6 +7480,14 @@ export type ListHosts200 = {
|
||||
status: string;
|
||||
};
|
||||
|
||||
export type ListPods200 = {
|
||||
data: InframonitoringtypesPodsDTO;
|
||||
/**
|
||||
* @type string
|
||||
*/
|
||||
status: string;
|
||||
};
|
||||
|
||||
export type Livez200 = {
|
||||
data: FactoryResponseDTO;
|
||||
/**
|
||||
|
||||
@@ -139,8 +139,8 @@ function ChartPreview({
|
||||
if (startTime && endTime && startTime !== endTime) {
|
||||
dispatch(
|
||||
UpdateTimeInterval('custom', [
|
||||
parseInt(getTimeString(startTime), 10),
|
||||
parseInt(getTimeString(endTime), 10),
|
||||
Number.parseInt(getTimeString(startTime), 10),
|
||||
Number.parseInt(getTimeString(endTime), 10),
|
||||
]),
|
||||
);
|
||||
}
|
||||
|
||||
@@ -370,7 +370,7 @@ function FormAlertRules({
|
||||
// onQueryCategoryChange handles changes to query category
|
||||
// in state as well as sets additional defaults
|
||||
const onQueryCategoryChange = (val: EQueryType): void => {
|
||||
const element = document.getElementById('top');
|
||||
const element = document.querySelector('#top');
|
||||
if (element) {
|
||||
element.scrollIntoView({ behavior: 'smooth' });
|
||||
}
|
||||
|
||||
@@ -279,7 +279,7 @@ function Explorer(): JSX.Element {
|
||||
[],
|
||||
);
|
||||
|
||||
const [warning, setWarning] = useState<Warning | undefined>(undefined);
|
||||
const [warning, setWarning] = useState<Warning | undefined>();
|
||||
|
||||
const oneChartPerQueryDisabledTooltip = useMemo(() => {
|
||||
if (splitedQueries.length <= 1) {
|
||||
@@ -291,7 +291,7 @@ function Explorer(): JSX.Element {
|
||||
if (disableOneChartPerQuery) {
|
||||
return 'One chart per query cannot be disabled for multiple queries with different units.';
|
||||
}
|
||||
return undefined;
|
||||
return;
|
||||
}, [disableOneChartPerQuery, splitedQueries.length, units.length]);
|
||||
|
||||
// Show the y axis unit selector if -
|
||||
|
||||
@@ -217,7 +217,7 @@ function Inspect({
|
||||
);
|
||||
}
|
||||
|
||||
if (!inspectMetricsTimeSeries.length) {
|
||||
if (inspectMetricsTimeSeries.length === 0) {
|
||||
return renderFallback(
|
||||
'inspect-metrics-empty',
|
||||
<Empty description="No time series found for this metric to inspect." />,
|
||||
|
||||
@@ -254,10 +254,10 @@ export function useInspectMetrics(
|
||||
const valuesMap = new Map<number, number>();
|
||||
|
||||
series.values.forEach(({ timestamp, value }) => {
|
||||
valuesMap.set(timestamp, parseFloat(value));
|
||||
valuesMap.set(timestamp, Number.parseFloat(value));
|
||||
});
|
||||
|
||||
return timestamps.map((timestamp) => valuesMap.get(timestamp) ?? NaN);
|
||||
return timestamps.map((timestamp) => valuesMap.get(timestamp) ?? Number.NaN);
|
||||
});
|
||||
|
||||
const rawData = [timestamps, ...timeseriesArray];
|
||||
@@ -271,7 +271,7 @@ export function useInspectMetrics(
|
||||
labels.add(label);
|
||||
});
|
||||
});
|
||||
return Array.from(labels);
|
||||
return [...labels];
|
||||
}, [inspectMetricsData]);
|
||||
|
||||
const reset = useCallback(() => {
|
||||
|
||||
@@ -115,8 +115,10 @@ export const useGetQueryRange: UseGetQueryRange = (
|
||||
|
||||
const updatedQuery = updateBarStepInterval(
|
||||
requestData.query,
|
||||
requestData.start ? requestData.start * 1e3 : parseInt(start, 10) * 1e3,
|
||||
requestData.end ? requestData.end * 1e3 : parseInt(end, 10) * 1e3,
|
||||
requestData.start
|
||||
? requestData.start * 1e3
|
||||
: Number.parseInt(start, 10) * 1e3,
|
||||
requestData.end ? requestData.end * 1e3 : Number.parseInt(end, 10) * 1e3,
|
||||
);
|
||||
|
||||
return {
|
||||
|
||||
@@ -98,7 +98,7 @@ function LogsExplorer(): JSX.Element {
|
||||
setIsLoadingQueries(false);
|
||||
}, [queryClient]);
|
||||
|
||||
const [warning, setWarning] = useState<Warning | undefined>(undefined);
|
||||
const [warning, setWarning] = useState<Warning | undefined>();
|
||||
|
||||
const handleChangeSelectedView = useCallback(
|
||||
(view: ExplorerViews, querySearchParameters?: ICurrentQueryData): void => {
|
||||
|
||||
@@ -101,7 +101,7 @@ function TracesExplorer(): JSX.Element {
|
||||
getExplorerViewFromUrl(searchParams, panelTypesFromUrl),
|
||||
);
|
||||
|
||||
const [warning, setWarning] = useState<Warning | undefined>(undefined);
|
||||
const [warning, setWarning] = useState<Warning | undefined>();
|
||||
const [isOpen, setOpen] = useState<boolean>(true);
|
||||
|
||||
const defaultQuery = useMemo(
|
||||
|
||||
94
pkg/alertmanager/alertmanagerserver/maintenance_muter.go
Normal file
94
pkg/alertmanager/alertmanagerserver/maintenance_muter.go
Normal file
@@ -0,0 +1,94 @@
|
||||
package alertmanagerserver
|
||||
|
||||
import (
|
||||
"context"
|
||||
"log/slog"
|
||||
"sync"
|
||||
"time"
|
||||
|
||||
"github.com/prometheus/alertmanager/types"
|
||||
"github.com/prometheus/common/model"
|
||||
|
||||
"github.com/SigNoz/signoz/pkg/types/ruletypes"
|
||||
)
|
||||
|
||||
// MaintenanceMuter implements types.Muter for maintenance windows.
|
||||
// It suppresses alerts whose ruleId label matches an active maintenance schedule.
|
||||
// Results are cached for cacheTTL to avoid a DB query on every per-alert check.
|
||||
type MaintenanceMuter struct {
|
||||
maintenanceStore ruletypes.MaintenanceStore
|
||||
orgID string
|
||||
logger *slog.Logger
|
||||
|
||||
mu sync.RWMutex
|
||||
cached []*ruletypes.PlannedMaintenance
|
||||
cacheExpiry time.Time
|
||||
}
|
||||
|
||||
const maintenanceCacheTTL = 30 * time.Second
|
||||
|
||||
func NewMaintenanceMuter(store ruletypes.MaintenanceStore, orgID string, logger *slog.Logger) *MaintenanceMuter {
|
||||
return &MaintenanceMuter{
|
||||
maintenanceStore: store,
|
||||
orgID: orgID,
|
||||
logger: logger,
|
||||
}
|
||||
}
|
||||
|
||||
func (m *MaintenanceMuter) Mutes(ctx context.Context, lset model.LabelSet) bool {
|
||||
ruleID := string(lset[ruletypes.AlertRuleIDLabel])
|
||||
if ruleID == "" {
|
||||
return false
|
||||
}
|
||||
now := time.Now()
|
||||
for _, mw := range m.getMaintenances(ctx) {
|
||||
if mw.ShouldSkip(ruleID, now) {
|
||||
return true
|
||||
}
|
||||
}
|
||||
return false
|
||||
}
|
||||
|
||||
func (m *MaintenanceMuter) getMaintenances(ctx context.Context) []*ruletypes.PlannedMaintenance {
|
||||
m.mu.RLock()
|
||||
if time.Now().Before(m.cacheExpiry) {
|
||||
cached := m.cached
|
||||
m.mu.RUnlock()
|
||||
return cached
|
||||
}
|
||||
m.mu.RUnlock()
|
||||
|
||||
m.mu.Lock()
|
||||
defer m.mu.Unlock()
|
||||
|
||||
// Double-check after acquiring write lock.
|
||||
if time.Now().Before(m.cacheExpiry) {
|
||||
return m.cached
|
||||
}
|
||||
|
||||
mws, err := m.maintenanceStore.ListPlannedMaintenance(ctx, m.orgID)
|
||||
if err != nil {
|
||||
m.logger.ErrorContext(ctx, "failed to list planned maintenance windows; alerts will not be suppressed", slog.String("org_id", m.orgID))
|
||||
return m.cached // return stale (potentially empty) cache on error
|
||||
}
|
||||
m.cached = mws
|
||||
m.cacheExpiry = time.Now().Add(maintenanceCacheTTL)
|
||||
return m.cached
|
||||
}
|
||||
|
||||
// maintenanceMuteStage wraps MaintenanceMuter as a notify.Stage.
|
||||
// We implement the stage directly rather than using notify.NewMuteStage to avoid
|
||||
// a dependency on the unexported *notify.Metrics field of PipelineBuilder.
|
||||
type maintenanceMuteStage struct {
|
||||
muter *MaintenanceMuter
|
||||
}
|
||||
|
||||
func (s *maintenanceMuteStage) Exec(ctx context.Context, _ *slog.Logger, alerts ...*types.Alert) (context.Context, []*types.Alert, error) {
|
||||
filtered := make([]*types.Alert, 0, len(alerts))
|
||||
for _, a := range alerts {
|
||||
if !s.muter.Mutes(ctx, a.Labels) {
|
||||
filtered = append(filtered, a)
|
||||
}
|
||||
}
|
||||
return ctx, filtered, nil
|
||||
}
|
||||
120
pkg/alertmanager/alertmanagerserver/pipeline_builder.go
Normal file
120
pkg/alertmanager/alertmanagerserver/pipeline_builder.go
Normal file
@@ -0,0 +1,120 @@
|
||||
// Copyright (c) 2026 SigNoz, Inc.
|
||||
// Copyright 2015 Prometheus Team
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
package alertmanagerserver
|
||||
|
||||
// pipelineBuilder is a local copy of notify.PipelineBuilder that injects
|
||||
// the maintenance mute stage immediately before the receiver stage.
|
||||
//
|
||||
// We maintain our own copy so we can control exactly where in the pipeline
|
||||
// the maintenance stage runs (between the silence stage and the receiver),
|
||||
// which is not possible by wrapping the output of the upstream builder.
|
||||
//
|
||||
// Upstream pipeline order (notify.PipelineBuilder.New, notify.go:444):
|
||||
//
|
||||
// GossipSettle → Inhibit → TimeActive → TimeMute → Silence → [mms] → Receiver
|
||||
|
||||
import (
|
||||
"time"
|
||||
|
||||
"github.com/prometheus/alertmanager/featurecontrol"
|
||||
"github.com/prometheus/alertmanager/inhibit"
|
||||
"github.com/prometheus/alertmanager/nflog/nflogpb"
|
||||
"github.com/prometheus/alertmanager/notify"
|
||||
"github.com/prometheus/alertmanager/silence"
|
||||
"github.com/prometheus/alertmanager/timeinterval"
|
||||
"github.com/prometheus/alertmanager/types"
|
||||
"github.com/prometheus/client_golang/prometheus"
|
||||
)
|
||||
|
||||
type pipelineBuilder struct {
|
||||
metrics *notify.Metrics
|
||||
ff featurecontrol.Flagger
|
||||
muter *MaintenanceMuter
|
||||
}
|
||||
|
||||
func newPipelineBuilder(
|
||||
r prometheus.Registerer,
|
||||
ff featurecontrol.Flagger,
|
||||
muter *MaintenanceMuter,
|
||||
) *pipelineBuilder {
|
||||
return &pipelineBuilder{
|
||||
metrics: notify.NewMetrics(r, ff),
|
||||
ff: ff,
|
||||
muter: muter,
|
||||
}
|
||||
}
|
||||
|
||||
// New returns a map of receivers to Stages, mirroring notify.PipelineBuilder.New
|
||||
// but inserting a maintenanceMuteStage between the silence stage and the receiver.
|
||||
func (pb *pipelineBuilder) New(
|
||||
receivers map[string][]notify.Integration,
|
||||
wait func() time.Duration,
|
||||
inhibitor *inhibit.Inhibitor,
|
||||
silencer *silence.Silencer,
|
||||
intervener *timeinterval.Intervener,
|
||||
marker types.GroupMarker,
|
||||
notificationLog notify.NotificationLog,
|
||||
peer notify.Peer,
|
||||
) notify.RoutingStage {
|
||||
rs := make(notify.RoutingStage, len(receivers))
|
||||
|
||||
ms := notify.NewGossipSettleStage(peer)
|
||||
is := notify.NewMuteStage(inhibitor, pb.metrics)
|
||||
tas := notify.NewTimeActiveStage(intervener, marker, pb.metrics)
|
||||
tms := notify.NewTimeMuteStage(intervener, marker, pb.metrics)
|
||||
ss := notify.NewMuteStage(silencer, pb.metrics)
|
||||
|
||||
var mms *maintenanceMuteStage
|
||||
if pb.muter != nil {
|
||||
mms = &maintenanceMuteStage{muter: pb.muter}
|
||||
}
|
||||
|
||||
for name := range receivers {
|
||||
stages := notify.MultiStage{ms, is, tas, tms, ss}
|
||||
if mms != nil {
|
||||
stages = append(stages, mms)
|
||||
}
|
||||
stages = append(stages, createReceiverStage(name, receivers[name], wait, notificationLog, pb.metrics))
|
||||
rs[name] = stages
|
||||
}
|
||||
|
||||
pb.metrics.InitializeFor(receivers)
|
||||
return rs
|
||||
}
|
||||
|
||||
// createReceiverStage is a copy of notify.createReceiverStage (unexported upstream).
|
||||
func createReceiverStage(
|
||||
name string,
|
||||
integrations []notify.Integration,
|
||||
wait func() time.Duration,
|
||||
notificationLog notify.NotificationLog,
|
||||
metrics *notify.Metrics,
|
||||
) notify.Stage {
|
||||
var fs notify.FanoutStage
|
||||
for i := range integrations {
|
||||
recv := &nflogpb.Receiver{
|
||||
GroupName: name,
|
||||
Integration: integrations[i].Name(),
|
||||
Idx: uint32(integrations[i].Index()),
|
||||
}
|
||||
var s notify.MultiStage
|
||||
s = append(s, notify.NewWaitStage(wait))
|
||||
s = append(s, notify.NewDedupStage(&integrations[i], notificationLog, recv))
|
||||
s = append(s, notify.NewRetryStage(integrations[i], name, metrics))
|
||||
s = append(s, notify.NewSetNotifiesStage(notificationLog, recv))
|
||||
fs = append(fs, s)
|
||||
}
|
||||
return fs
|
||||
}
|
||||
@@ -26,14 +26,13 @@ import (
|
||||
"github.com/SigNoz/signoz/pkg/alertmanager/nfmanager"
|
||||
"github.com/SigNoz/signoz/pkg/errors"
|
||||
"github.com/SigNoz/signoz/pkg/types/alertmanagertypes"
|
||||
ruletypes "github.com/SigNoz/signoz/pkg/types/ruletypes"
|
||||
)
|
||||
|
||||
var (
|
||||
// This is not a real file and will never be used. We need this placeholder to ensure maintenance runs on shutdown. See
|
||||
// https://github.com/prometheus/server/blob/3ee2cd0f1271e277295c02b6160507b4d193dde2/silence/silence.go#L435-L438
|
||||
// and https://github.com/prometheus/server/blob/3b06b97af4d146e141af92885a185891eb79a5b0/nflog/nflog.go#L362.
|
||||
snapfnoop string = "snapfnoop"
|
||||
)
|
||||
// This is not a real snapshot file and will never be used. We need this placeholder to ensure maintenance runs on shutdown.
|
||||
// See https://github.com/prometheus/alertmanager/blob/3ee2cd0f1271e277295c02b6160507b4d193dde2/silence/silence.go#L435-L438
|
||||
// and https://github.com/prometheus/alertmanager/blob/3b06b97af4d146e141af92885a185891eb79a5b0/nflog/nflog.go#L362.
|
||||
var snapfnoop string = "snapfnoop"
|
||||
|
||||
type Server struct {
|
||||
// logger is the logger for the alertmanager
|
||||
@@ -63,7 +62,7 @@ type Server struct {
|
||||
silencer *silence.Silencer
|
||||
silences *silence.Silences
|
||||
timeIntervals map[string][]timeinterval.TimeInterval
|
||||
pipelineBuilder *notify.PipelineBuilder
|
||||
pipelineBuilder *pipelineBuilder
|
||||
marker *alertmanagertypes.MemMarker
|
||||
tmpl *template.Template
|
||||
wg sync.WaitGroup
|
||||
@@ -71,7 +70,16 @@ type Server struct {
|
||||
notificationManager nfmanager.NotificationManager
|
||||
}
|
||||
|
||||
func New(ctx context.Context, logger *slog.Logger, registry prometheus.Registerer, srvConfig Config, orgID string, stateStore alertmanagertypes.StateStore, nfManager nfmanager.NotificationManager) (*Server, error) {
|
||||
func New(
|
||||
ctx context.Context,
|
||||
logger *slog.Logger,
|
||||
registry prometheus.Registerer,
|
||||
srvConfig Config,
|
||||
orgID string,
|
||||
stateStore alertmanagertypes.StateStore,
|
||||
nfManager nfmanager.NotificationManager,
|
||||
maintenanceStore ruletypes.MaintenanceStore,
|
||||
) (*Server, error) {
|
||||
server := &Server{
|
||||
logger: logger.With(slog.String("pkg", "go.signoz.io/pkg/alertmanager/alertmanagerserver")),
|
||||
registry: registry,
|
||||
@@ -160,7 +168,6 @@ func New(ctx context.Context, logger *slog.Logger, registry prometheus.Registere
|
||||
|
||||
return c, server.stateStore.Set(ctx, storableSilences)
|
||||
})
|
||||
|
||||
}()
|
||||
|
||||
// Start maintenance for notification logs
|
||||
@@ -196,7 +203,11 @@ func New(ctx context.Context, logger *slog.Logger, registry prometheus.Registere
|
||||
return nil, err
|
||||
}
|
||||
|
||||
server.pipelineBuilder = notify.NewPipelineBuilder(signozRegisterer, featurecontrol.NoopFlags{})
|
||||
var muter *MaintenanceMuter
|
||||
if maintenanceStore != nil {
|
||||
muter = NewMaintenanceMuter(maintenanceStore, orgID, server.logger)
|
||||
}
|
||||
server.pipelineBuilder = newPipelineBuilder(signozRegisterer, featurecontrol.NoopFlags{}, muter)
|
||||
server.dispatcherMetrics = NewDispatcherMetrics(false, signozRegisterer)
|
||||
|
||||
return server, nil
|
||||
|
||||
@@ -90,7 +90,7 @@ func TestEndToEndAlertManagerFlow(t *testing.T) {
|
||||
stateStore := alertmanagertypestest.NewStateStore()
|
||||
registry := prometheus.NewRegistry()
|
||||
logger := slog.New(slog.DiscardHandler)
|
||||
server, err := New(context.Background(), logger, registry, srvCfg, orgID, stateStore, notificationManager)
|
||||
server, err := New(context.Background(), logger, registry, srvCfg, orgID, stateStore, notificationManager, nil)
|
||||
require.NoError(t, err)
|
||||
amConfig, err := alertmanagertypes.NewDefaultConfig(srvCfg.Global, srvCfg.Route, orgID)
|
||||
require.NoError(t, err)
|
||||
|
||||
@@ -25,7 +25,7 @@ import (
|
||||
|
||||
func TestServerSetConfigAndStop(t *testing.T) {
|
||||
notificationManager := nfmanagertest.NewMock()
|
||||
server, err := New(context.Background(), slog.New(slog.DiscardHandler), prometheus.NewRegistry(), NewConfig(), "1", alertmanagertypestest.NewStateStore(), notificationManager)
|
||||
server, err := New(context.Background(), slog.New(slog.DiscardHandler), prometheus.NewRegistry(), NewConfig(), "1", alertmanagertypestest.NewStateStore(), notificationManager, nil)
|
||||
require.NoError(t, err)
|
||||
|
||||
amConfig, err := alertmanagertypes.NewDefaultConfig(alertmanagertypes.GlobalConfig{}, alertmanagertypes.RouteConfig{GroupInterval: 1 * time.Minute, RepeatInterval: 1 * time.Minute, GroupWait: 1 * time.Minute}, "1")
|
||||
@@ -37,7 +37,7 @@ func TestServerSetConfigAndStop(t *testing.T) {
|
||||
|
||||
func TestServerTestReceiverTypeWebhook(t *testing.T) {
|
||||
notificationManager := nfmanagertest.NewMock()
|
||||
server, err := New(context.Background(), slog.New(slog.DiscardHandler), prometheus.NewRegistry(), NewConfig(), "1", alertmanagertypestest.NewStateStore(), notificationManager)
|
||||
server, err := New(context.Background(), slog.New(slog.DiscardHandler), prometheus.NewRegistry(), NewConfig(), "1", alertmanagertypestest.NewStateStore(), notificationManager, nil)
|
||||
require.NoError(t, err)
|
||||
|
||||
amConfig, err := alertmanagertypes.NewDefaultConfig(alertmanagertypes.GlobalConfig{}, alertmanagertypes.RouteConfig{GroupInterval: 1 * time.Minute, RepeatInterval: 1 * time.Minute, GroupWait: 1 * time.Minute}, "1")
|
||||
@@ -85,7 +85,7 @@ func TestServerPutAlerts(t *testing.T) {
|
||||
srvCfg := NewConfig()
|
||||
srvCfg.Route.GroupInterval = 1 * time.Second
|
||||
notificationManager := nfmanagertest.NewMock()
|
||||
server, err := New(context.Background(), slog.New(slog.DiscardHandler), prometheus.NewRegistry(), srvCfg, "1", stateStore, notificationManager)
|
||||
server, err := New(context.Background(), slog.New(slog.DiscardHandler), prometheus.NewRegistry(), srvCfg, "1", stateStore, notificationManager, nil)
|
||||
require.NoError(t, err)
|
||||
|
||||
amConfig, err := alertmanagertypes.NewDefaultConfig(srvCfg.Global, srvCfg.Route, "1")
|
||||
@@ -133,7 +133,7 @@ func TestServerTestAlert(t *testing.T) {
|
||||
srvCfg := NewConfig()
|
||||
srvCfg.Route.GroupInterval = 1 * time.Second
|
||||
notificationManager := nfmanagertest.NewMock()
|
||||
server, err := New(context.Background(), slog.New(slog.DiscardHandler), prometheus.NewRegistry(), srvCfg, "1", stateStore, notificationManager)
|
||||
server, err := New(context.Background(), slog.New(slog.DiscardHandler), prometheus.NewRegistry(), srvCfg, "1", stateStore, notificationManager, nil)
|
||||
require.NoError(t, err)
|
||||
|
||||
amConfig, err := alertmanagertypes.NewDefaultConfig(srvCfg.Global, srvCfg.Route, "1")
|
||||
@@ -238,7 +238,7 @@ func TestServerTestAlertContinuesOnFailure(t *testing.T) {
|
||||
srvCfg := NewConfig()
|
||||
srvCfg.Route.GroupInterval = 1 * time.Second
|
||||
notificationManager := nfmanagertest.NewMock()
|
||||
server, err := New(context.Background(), slog.New(slog.DiscardHandler), prometheus.NewRegistry(), srvCfg, "1", stateStore, notificationManager)
|
||||
server, err := New(context.Background(), slog.New(slog.DiscardHandler), prometheus.NewRegistry(), srvCfg, "1", stateStore, notificationManager, nil)
|
||||
require.NoError(t, err)
|
||||
|
||||
amConfig, err := alertmanagertypes.NewDefaultConfig(srvCfg.Global, srvCfg.Route, "1")
|
||||
|
||||
@@ -14,6 +14,7 @@ import (
|
||||
"github.com/SigNoz/signoz/pkg/factory"
|
||||
"github.com/SigNoz/signoz/pkg/modules/organization"
|
||||
"github.com/SigNoz/signoz/pkg/types/alertmanagertypes"
|
||||
ruletypes "github.com/SigNoz/signoz/pkg/types/ruletypes"
|
||||
)
|
||||
|
||||
type Service struct {
|
||||
@@ -39,16 +40,18 @@ type Service struct {
|
||||
serversMtx sync.RWMutex
|
||||
|
||||
notificationManager nfmanager.NotificationManager
|
||||
|
||||
maintenanceStore ruletypes.MaintenanceStore
|
||||
}
|
||||
|
||||
func New(
|
||||
ctx context.Context,
|
||||
settings factory.ScopedProviderSettings,
|
||||
config alertmanagerserver.Config,
|
||||
stateStore alertmanagertypes.StateStore,
|
||||
configStore alertmanagertypes.ConfigStore,
|
||||
orgGetter organization.Getter,
|
||||
nfManager nfmanager.NotificationManager,
|
||||
maintenanceStore ruletypes.MaintenanceStore,
|
||||
) *Service {
|
||||
service := &Service{
|
||||
config: config,
|
||||
@@ -59,6 +62,7 @@ func New(
|
||||
servers: make(map[string]*alertmanagerserver.Server),
|
||||
serversMtx: sync.RWMutex{},
|
||||
notificationManager: nfManager,
|
||||
maintenanceStore: maintenanceStore,
|
||||
}
|
||||
|
||||
return service
|
||||
@@ -177,7 +181,10 @@ func (service *Service) newServer(ctx context.Context, orgID string) (*alertmana
|
||||
return nil, err
|
||||
}
|
||||
|
||||
server, err := alertmanagerserver.New(ctx, service.settings.Logger(), service.settings.PrometheusRegisterer(), service.config, orgID, service.stateStore, service.notificationManager)
|
||||
server, err := alertmanagerserver.New(
|
||||
ctx, service.settings.Logger(), service.settings.PrometheusRegisterer(), service.config, orgID,
|
||||
service.stateStore, service.notificationManager, service.maintenanceStore,
|
||||
)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
@@ -20,6 +20,7 @@ import (
|
||||
"github.com/SigNoz/signoz/pkg/types"
|
||||
"github.com/SigNoz/signoz/pkg/types/alertmanagertypes"
|
||||
"github.com/SigNoz/signoz/pkg/types/authtypes"
|
||||
ruletypes "github.com/SigNoz/signoz/pkg/types/ruletypes"
|
||||
"github.com/SigNoz/signoz/pkg/valuer"
|
||||
)
|
||||
|
||||
@@ -30,35 +31,49 @@ type provider struct {
|
||||
configStore alertmanagertypes.ConfigStore
|
||||
stateStore alertmanagertypes.StateStore
|
||||
notificationManager nfmanager.NotificationManager
|
||||
maintenanceStore ruletypes.MaintenanceStore
|
||||
stopC chan struct{}
|
||||
}
|
||||
|
||||
func NewFactory(sqlstore sqlstore.SQLStore, orgGetter organization.Getter, notificationManager nfmanager.NotificationManager) factory.ProviderFactory[alertmanager.Alertmanager, alertmanager.Config] {
|
||||
func NewFactory(
|
||||
sqlstore sqlstore.SQLStore,
|
||||
orgGetter organization.Getter,
|
||||
notificationManager nfmanager.NotificationManager,
|
||||
maintenanceStore ruletypes.MaintenanceStore,
|
||||
) factory.ProviderFactory[alertmanager.Alertmanager, alertmanager.Config] {
|
||||
return factory.NewProviderFactory(factory.MustNewName("signoz"), func(ctx context.Context, settings factory.ProviderSettings, config alertmanager.Config) (alertmanager.Alertmanager, error) {
|
||||
return New(ctx, settings, config, sqlstore, orgGetter, notificationManager)
|
||||
return New(settings, config, sqlstore, orgGetter, notificationManager, maintenanceStore)
|
||||
})
|
||||
}
|
||||
|
||||
func New(ctx context.Context, providerSettings factory.ProviderSettings, config alertmanager.Config, sqlstore sqlstore.SQLStore, orgGetter organization.Getter, notificationManager nfmanager.NotificationManager) (*provider, error) {
|
||||
func New(
|
||||
providerSettings factory.ProviderSettings,
|
||||
config alertmanager.Config,
|
||||
sqlstore sqlstore.SQLStore,
|
||||
orgGetter organization.Getter,
|
||||
notificationManager nfmanager.NotificationManager,
|
||||
maintenanceStore ruletypes.MaintenanceStore,
|
||||
) (*provider, error) {
|
||||
settings := factory.NewScopedProviderSettings(providerSettings, "github.com/SigNoz/signoz/pkg/alertmanager/signozalertmanager")
|
||||
configStore := sqlalertmanagerstore.NewConfigStore(sqlstore)
|
||||
stateStore := sqlalertmanagerstore.NewStateStore(sqlstore)
|
||||
|
||||
p := &provider{
|
||||
service: alertmanager.New(
|
||||
ctx,
|
||||
settings,
|
||||
config.Signoz.Config,
|
||||
stateStore,
|
||||
configStore,
|
||||
orgGetter,
|
||||
notificationManager,
|
||||
maintenanceStore,
|
||||
),
|
||||
settings: settings,
|
||||
config: config,
|
||||
configStore: configStore,
|
||||
stateStore: stateStore,
|
||||
notificationManager: notificationManager,
|
||||
maintenanceStore: maintenanceStore,
|
||||
stopC: make(chan struct{}),
|
||||
}
|
||||
|
||||
|
||||
@@ -16,7 +16,7 @@ func (provider *provider) addInfraMonitoringRoutes(router *mux.Router) error {
|
||||
ID: "ListHosts",
|
||||
Tags: []string{"inframonitoring"},
|
||||
Summary: "List Hosts for Infra Monitoring",
|
||||
Description: "Returns a paginated list of hosts with key infrastructure metrics: CPU usage (%), memory usage (%), I/O wait (%), disk usage (%), and 15-minute load average. Each host includes its current status (active/inactive based on metrics reported in the last 10 minutes) and metadata attributes (e.g., os.type). Supports filtering via a filter expression, filtering by host status, custom groupBy to aggregate hosts by any attribute, ordering by any of the five metrics, and pagination via offset/limit. The response type is 'list' for the default host.name grouping or 'grouped_list' for custom groupBy keys. Also reports missing required metrics and whether the requested time range falls before the data retention boundary.",
|
||||
Description: "Returns a paginated list of hosts with key infrastructure metrics: CPU usage (%), memory usage (%), I/O wait (%), disk usage (%), and 15-minute load average. Each host includes its current status (active/inactive based on metrics reported in the last 10 minutes) and metadata attributes (e.g., os.type). Supports filtering via a filter expression, filtering by host status, custom groupBy to aggregate hosts by any attribute, ordering by any of the five metrics, and pagination via offset/limit. The response type is 'list' for the default host.name grouping or 'grouped_list' for custom groupBy keys. Also reports missing required metrics and whether the requested time range falls before the data retention boundary. Numeric metric fields (cpu, memory, wait, load15, diskUsage) return -1 as a sentinel when no data is available for that field.",
|
||||
Request: new(inframonitoringtypes.PostableHosts),
|
||||
RequestContentType: "application/json",
|
||||
Response: new(inframonitoringtypes.Hosts),
|
||||
@@ -29,5 +29,24 @@ func (provider *provider) addInfraMonitoringRoutes(router *mux.Router) error {
|
||||
return err
|
||||
}
|
||||
|
||||
if err := router.Handle("/api/v2/infra_monitoring/pods", handler.New(
|
||||
provider.authZ.ViewAccess(provider.infraMonitoringHandler.ListPods),
|
||||
handler.OpenAPIDef{
|
||||
ID: "ListPods",
|
||||
Tags: []string{"inframonitoring"},
|
||||
Summary: "List Pods for Infra Monitoring",
|
||||
Description: "Returns a paginated list of Kubernetes pods with key metrics: CPU usage, CPU request/limit utilization, memory working set, memory request/limit utilization, current pod phase (pending/running/succeeded/failed/unknown), and pod age (ms since start time). Each pod includes metadata attributes (namespace, node, workload owner such as deployment/statefulset/daemonset/job/cronjob, cluster). Supports filtering via a filter expression, custom groupBy to aggregate pods by any attribute, ordering by any of the six metrics (cpu, cpu_request, cpu_limit, memory, memory_request, memory_limit), and pagination via offset/limit. The response type is 'list' for the default k8s.pod.uid grouping (each row is one pod with its current phase) or 'grouped_list' for custom groupBy keys (each row aggregates pods in the group with per-phase counts: pendingPodCount, runningPodCount, succeededPodCount, failedPodCount, unknownPodCount derived from each pod's latest phase in the window). Also reports missing required metrics and whether the requested time range falls before the data retention boundary. Numeric metric fields (podCPU, podCPURequest, podCPULimit, podMemory, podMemoryRequest, podMemoryLimit, podAge) return -1 as a sentinel when no data is available for that field.",
|
||||
Request: new(inframonitoringtypes.PostablePods),
|
||||
RequestContentType: "application/json",
|
||||
Response: new(inframonitoringtypes.Pods),
|
||||
ResponseContentType: "application/json",
|
||||
SuccessStatusCode: http.StatusOK,
|
||||
ErrorStatusCodes: []int{http.StatusBadRequest, http.StatusUnauthorized},
|
||||
Deprecated: false,
|
||||
SecuritySchemes: newSecuritySchemes(types.RoleViewer),
|
||||
})).Methods(http.MethodPost).GetError(); err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
@@ -45,3 +45,27 @@ func (h *handler) ListHosts(rw http.ResponseWriter, req *http.Request) {
|
||||
|
||||
render.Success(rw, http.StatusOK, result)
|
||||
}
|
||||
|
||||
func (h *handler) ListPods(rw http.ResponseWriter, req *http.Request) {
|
||||
claims, err := authtypes.ClaimsFromContext(req.Context())
|
||||
if err != nil {
|
||||
render.Error(rw, err)
|
||||
return
|
||||
}
|
||||
|
||||
orgID := valuer.MustNewUUID(claims.OrgID)
|
||||
|
||||
var parsedReq inframonitoringtypes.PostablePods
|
||||
if err := binding.JSON.BindBody(req.Body, &parsedReq); err != nil {
|
||||
render.Error(rw, err)
|
||||
return
|
||||
}
|
||||
|
||||
result, err := h.module.ListPods(req.Context(), orgID, &parsedReq)
|
||||
if err != nil {
|
||||
render.Error(rw, err)
|
||||
return
|
||||
}
|
||||
|
||||
render.Success(rw, http.StatusOK, result)
|
||||
}
|
||||
|
||||
@@ -14,3 +14,12 @@ type groupHostStatusCounts struct {
|
||||
Active int
|
||||
Inactive int
|
||||
}
|
||||
|
||||
// podPhaseCounts holds per-group pod counts bucketed by latest phase in window.
|
||||
type podPhaseCounts struct {
|
||||
Pending int
|
||||
Running int
|
||||
Succeeded int
|
||||
Failed int
|
||||
Unknown int
|
||||
}
|
||||
|
||||
@@ -159,3 +159,86 @@ func (m *module) ListHosts(ctx context.Context, orgID valuer.UUID, req *inframon
|
||||
|
||||
return resp, nil
|
||||
}
|
||||
|
||||
func (m *module) ListPods(ctx context.Context, orgID valuer.UUID, req *inframonitoringtypes.PostablePods) (*inframonitoringtypes.Pods, error) {
|
||||
if err := req.Validate(); err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
resp := &inframonitoringtypes.Pods{}
|
||||
|
||||
if req.OrderBy == nil {
|
||||
req.OrderBy = &qbtypes.OrderBy{
|
||||
Key: qbtypes.OrderByKey{
|
||||
TelemetryFieldKey: telemetrytypes.TelemetryFieldKey{
|
||||
Name: inframonitoringtypes.PodsOrderByCPU,
|
||||
},
|
||||
},
|
||||
Direction: qbtypes.OrderDirectionDesc,
|
||||
}
|
||||
}
|
||||
|
||||
if len(req.GroupBy) == 0 {
|
||||
req.GroupBy = []qbtypes.GroupByKey{podUIDGroupByKey}
|
||||
resp.Type = inframonitoringtypes.ResponseTypeList
|
||||
} else {
|
||||
resp.Type = inframonitoringtypes.ResponseTypeGroupedList
|
||||
}
|
||||
|
||||
missingMetrics, minFirstReportedUnixMilli, err := m.getMetricsExistenceAndEarliestTime(ctx, podsTableMetricNamesList)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
if len(missingMetrics) > 0 {
|
||||
resp.RequiredMetricsCheck = inframonitoringtypes.RequiredMetricsCheck{MissingMetrics: missingMetrics}
|
||||
resp.Records = []inframonitoringtypes.PodRecord{}
|
||||
resp.Total = 0
|
||||
return resp, nil
|
||||
}
|
||||
if req.End < int64(minFirstReportedUnixMilli) {
|
||||
resp.EndTimeBeforeRetention = true
|
||||
resp.Records = []inframonitoringtypes.PodRecord{}
|
||||
resp.Total = 0
|
||||
return resp, nil
|
||||
}
|
||||
resp.RequiredMetricsCheck = inframonitoringtypes.RequiredMetricsCheck{MissingMetrics: []string{}}
|
||||
|
||||
metadataMap, err := m.getPodsTableMetadata(ctx, req)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
resp.Total = len(metadataMap)
|
||||
|
||||
pageGroups, err := m.getTopPodGroups(ctx, orgID, req, metadataMap)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
if len(pageGroups) == 0 {
|
||||
resp.Records = []inframonitoringtypes.PodRecord{}
|
||||
return resp, nil
|
||||
}
|
||||
|
||||
filterExpr := ""
|
||||
if req.Filter != nil {
|
||||
filterExpr = req.Filter.Expression
|
||||
}
|
||||
|
||||
fullQueryReq := buildFullQueryRequest(req.Start, req.End, filterExpr, req.GroupBy, pageGroups, m.newPodsTableListQuery())
|
||||
queryResp, err := m.querier.QueryRange(ctx, orgID, fullQueryReq)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
phaseCounts, err := m.getPerGroupPodPhaseCounts(ctx, req, pageGroups)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
isPodUIDInGroupBy := isKeyInGroupByAttrs(req.GroupBy, podUIDAttrKey)
|
||||
resp.Records = buildPodRecords(isPodUIDInGroupBy, queryResp, pageGroups, req.GroupBy, metadataMap, phaseCounts, req.End)
|
||||
resp.Warning = queryResp.Warning
|
||||
|
||||
return resp, nil
|
||||
}
|
||||
|
||||
334
pkg/modules/inframonitoring/implinframonitoring/pods.go
Normal file
334
pkg/modules/inframonitoring/implinframonitoring/pods.go
Normal file
@@ -0,0 +1,334 @@
|
||||
package implinframonitoring
|
||||
|
||||
import (
|
||||
"context"
|
||||
"fmt"
|
||||
"slices"
|
||||
"strings"
|
||||
"time"
|
||||
|
||||
"github.com/SigNoz/signoz/pkg/querybuilder"
|
||||
"github.com/SigNoz/signoz/pkg/telemetrymetrics"
|
||||
"github.com/SigNoz/signoz/pkg/types/inframonitoringtypes"
|
||||
"github.com/SigNoz/signoz/pkg/types/metrictypes"
|
||||
qbtypes "github.com/SigNoz/signoz/pkg/types/querybuildertypes/querybuildertypesv5"
|
||||
"github.com/SigNoz/signoz/pkg/valuer"
|
||||
"github.com/huandu/go-sqlbuilder"
|
||||
)
|
||||
|
||||
// buildPodRecords assembles the page records. Phase counts come from
|
||||
// phaseCounts in both modes. In list mode (isPodUIDInGroupBy=true) each
|
||||
// group is one pod, so exactly one count is 1; PodPhase is derived from
|
||||
// which one. In grouped_list mode PodPhase stays PodPhaseNone.
|
||||
func buildPodRecords(
|
||||
isPodUIDInGroupBy bool,
|
||||
resp *qbtypes.QueryRangeResponse,
|
||||
pageGroups []map[string]string,
|
||||
groupBy []qbtypes.GroupByKey,
|
||||
metadataMap map[string]map[string]string,
|
||||
phaseCounts map[string]podPhaseCounts,
|
||||
reqEnd int64,
|
||||
) []inframonitoringtypes.PodRecord {
|
||||
metricsMap := parseFullQueryResponse(resp, groupBy)
|
||||
|
||||
records := make([]inframonitoringtypes.PodRecord, 0, len(pageGroups))
|
||||
for _, labels := range pageGroups {
|
||||
compositeKey := compositeKeyFromLabels(labels, groupBy)
|
||||
podUID := labels[podUIDAttrKey]
|
||||
|
||||
record := inframonitoringtypes.PodRecord{ // initialize with default values
|
||||
PodUID: podUID,
|
||||
PodPhase: inframonitoringtypes.PodPhaseNone,
|
||||
PodCPU: -1,
|
||||
PodCPURequest: -1,
|
||||
PodCPULimit: -1,
|
||||
PodMemory: -1,
|
||||
PodMemoryRequest: -1,
|
||||
PodMemoryLimit: -1,
|
||||
PodAge: -1,
|
||||
Meta: map[string]any{},
|
||||
}
|
||||
|
||||
if metrics, ok := metricsMap[compositeKey]; ok {
|
||||
if v, exists := metrics["A"]; exists {
|
||||
record.PodCPU = v
|
||||
}
|
||||
if v, exists := metrics["B"]; exists {
|
||||
record.PodCPURequest = v
|
||||
}
|
||||
if v, exists := metrics["C"]; exists {
|
||||
record.PodCPULimit = v
|
||||
}
|
||||
if v, exists := metrics["D"]; exists {
|
||||
record.PodMemory = v
|
||||
}
|
||||
if v, exists := metrics["E"]; exists {
|
||||
record.PodMemoryRequest = v
|
||||
}
|
||||
if v, exists := metrics["F"]; exists {
|
||||
record.PodMemoryLimit = v
|
||||
}
|
||||
}
|
||||
|
||||
if phaseCountsForGroup, ok := phaseCounts[compositeKey]; ok {
|
||||
record.PendingPodCount = phaseCountsForGroup.Pending
|
||||
record.RunningPodCount = phaseCountsForGroup.Running
|
||||
record.SucceededPodCount = phaseCountsForGroup.Succeeded
|
||||
record.FailedPodCount = phaseCountsForGroup.Failed
|
||||
record.UnknownPodCount = phaseCountsForGroup.Unknown
|
||||
|
||||
// In list mode each group is one pod; the count==1 bucket identifies the phase.
|
||||
if isPodUIDInGroupBy {
|
||||
switch {
|
||||
case phaseCountsForGroup.Pending == 1:
|
||||
record.PodPhase = inframonitoringtypes.PodPhasePending
|
||||
case phaseCountsForGroup.Running == 1:
|
||||
record.PodPhase = inframonitoringtypes.PodPhaseRunning
|
||||
case phaseCountsForGroup.Succeeded == 1:
|
||||
record.PodPhase = inframonitoringtypes.PodPhaseSucceeded
|
||||
case phaseCountsForGroup.Failed == 1:
|
||||
record.PodPhase = inframonitoringtypes.PodPhaseFailed
|
||||
case phaseCountsForGroup.Unknown == 1:
|
||||
record.PodPhase = inframonitoringtypes.PodPhaseUnknown
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if attrs, ok := metadataMap[compositeKey]; ok && isPodUIDInGroupBy {
|
||||
// the condition above ensures we deduce age only if pod uid is in group by because if
|
||||
// it's not in group by then we might have multiple pod uids in the same group and hence then podAge wont make sense
|
||||
if startTimeStr, exists := attrs[podStartTimeAttrKey]; exists && startTimeStr != "" {
|
||||
if t, err := time.Parse(time.RFC3339, startTimeStr); err == nil {
|
||||
startTimeMs := t.UnixMilli()
|
||||
if startTimeMs > 0 {
|
||||
record.PodAge = reqEnd - startTimeMs
|
||||
}
|
||||
}
|
||||
}
|
||||
for k, v := range attrs {
|
||||
record.Meta[k] = v
|
||||
}
|
||||
}
|
||||
|
||||
records = append(records, record)
|
||||
}
|
||||
return records
|
||||
}
|
||||
|
||||
func (m *module) getTopPodGroups(
|
||||
ctx context.Context,
|
||||
orgID valuer.UUID,
|
||||
req *inframonitoringtypes.PostablePods,
|
||||
metadataMap map[string]map[string]string,
|
||||
) ([]map[string]string, error) {
|
||||
orderByKey := req.OrderBy.Key.Name
|
||||
queryNamesForOrderBy := orderByToPodsQueryNames[orderByKey]
|
||||
rankingQueryName := queryNamesForOrderBy[len(queryNamesForOrderBy)-1]
|
||||
|
||||
topReq := &qbtypes.QueryRangeRequest{
|
||||
Start: uint64(req.Start),
|
||||
End: uint64(req.End),
|
||||
RequestType: qbtypes.RequestTypeScalar,
|
||||
CompositeQuery: qbtypes.CompositeQuery{
|
||||
Queries: make([]qbtypes.QueryEnvelope, 0, len(queryNamesForOrderBy)),
|
||||
},
|
||||
}
|
||||
|
||||
for _, envelope := range m.newPodsTableListQuery().CompositeQuery.Queries {
|
||||
if !slices.Contains(queryNamesForOrderBy, envelope.GetQueryName()) {
|
||||
continue
|
||||
}
|
||||
copied := envelope
|
||||
if copied.Type == qbtypes.QueryTypeBuilder {
|
||||
existingExpr := ""
|
||||
if f := copied.GetFilter(); f != nil {
|
||||
existingExpr = f.Expression
|
||||
}
|
||||
reqFilterExpr := ""
|
||||
if req.Filter != nil {
|
||||
reqFilterExpr = req.Filter.Expression
|
||||
}
|
||||
merged := mergeFilterExpressions(existingExpr, reqFilterExpr)
|
||||
copied.SetFilter(&qbtypes.Filter{Expression: merged})
|
||||
copied.SetGroupBy(req.GroupBy)
|
||||
}
|
||||
topReq.CompositeQuery.Queries = append(topReq.CompositeQuery.Queries, copied)
|
||||
}
|
||||
|
||||
resp, err := m.querier.QueryRange(ctx, orgID, topReq)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
allMetricGroups := parseAndSortGroups(resp, rankingQueryName, req.GroupBy, req.OrderBy.Direction)
|
||||
return paginateWithBackfill(allMetricGroups, metadataMap, req.GroupBy, req.Offset, req.Limit), nil
|
||||
}
|
||||
|
||||
func (m *module) getPodsTableMetadata(ctx context.Context, req *inframonitoringtypes.PostablePods) (map[string]map[string]string, error) {
|
||||
var nonGroupByAttrs []string
|
||||
for _, key := range podAttrKeysForMetadata {
|
||||
if !isKeyInGroupByAttrs(req.GroupBy, key) {
|
||||
nonGroupByAttrs = append(nonGroupByAttrs, key)
|
||||
}
|
||||
}
|
||||
return m.getMetadata(ctx, podsTableMetricNamesList, req.GroupBy, nonGroupByAttrs, req.Filter, req.Start, req.End)
|
||||
}
|
||||
|
||||
// getPerGroupPodPhaseCounts computes per-group pod counts bucketed by each
|
||||
// pod's latest phase in the requested window.
|
||||
// Pipeline:
|
||||
//
|
||||
// timeSeriesFPs: fp ↔ (pod_uid, groupBy cols) from the time_series table.
|
||||
// User filter + page-groups filter applied here.
|
||||
// latestPhasePerPod: INNER JOIN samples × timeSeriesFPs, collapsed to
|
||||
// the latest phase per pod via argMax(value, unix_milli).
|
||||
// countPodsPerPhase: per-group uniqExactIf into 5 phase buckets.
|
||||
//
|
||||
// Groups absent from the result map have implicit zero counts (caller default).
|
||||
func (m *module) getPerGroupPodPhaseCounts(
|
||||
ctx context.Context,
|
||||
req *inframonitoringtypes.PostablePods,
|
||||
pageGroups []map[string]string,
|
||||
) (map[string]podPhaseCounts, error) {
|
||||
if len(pageGroups) == 0 || len(req.GroupBy) == 0 {
|
||||
return map[string]podPhaseCounts{}, nil
|
||||
}
|
||||
|
||||
// Merged filter expression (user filter + page-groups IN clauses).
|
||||
reqFilterExpr := ""
|
||||
if req.Filter != nil {
|
||||
reqFilterExpr = req.Filter.Expression
|
||||
}
|
||||
pageGroupsFilterExpr := buildPageGroupsFilterExpr(pageGroups)
|
||||
filterExpr := mergeFilterExpressions(reqFilterExpr, pageGroupsFilterExpr)
|
||||
|
||||
// Resolve tables. Same convention as hosts (distributed names from helpers).
|
||||
adjustedStart, adjustedEnd, _, localTimeSeriesTable := telemetrymetrics.WhichTSTableToUse(
|
||||
uint64(req.Start), uint64(req.End), nil,
|
||||
)
|
||||
samplesTable := telemetrymetrics.WhichSamplesTableToUse(
|
||||
uint64(req.Start), uint64(req.End),
|
||||
metrictypes.UnspecifiedType, metrictypes.TimeAggregationUnspecified, nil,
|
||||
)
|
||||
valueCol := telemetrymetrics.ValueColumnForSamplesTable(samplesTable)
|
||||
|
||||
// ----- timeSeriesFPs -----
|
||||
timeSeriesFPs := sqlbuilder.NewSelectBuilder()
|
||||
timeSeriesFPsSelectCols := []string{
|
||||
"fingerprint",
|
||||
fmt.Sprintf("JSONExtractString(labels, %s) AS pod_uid", timeSeriesFPs.Var(podUIDAttrKey)),
|
||||
}
|
||||
for _, key := range req.GroupBy {
|
||||
timeSeriesFPsSelectCols = append(timeSeriesFPsSelectCols,
|
||||
fmt.Sprintf("JSONExtractString(labels, %s) AS %s", timeSeriesFPs.Var(key.Name), quoteIdentifier(key.Name)),
|
||||
)
|
||||
}
|
||||
timeSeriesFPs.Select(timeSeriesFPsSelectCols...)
|
||||
timeSeriesFPs.From(fmt.Sprintf("%s.%s", telemetrymetrics.DBName, localTimeSeriesTable))
|
||||
timeSeriesFPs.Where(
|
||||
timeSeriesFPs.E("metric_name", podPhaseMetricName),
|
||||
timeSeriesFPs.GE("unix_milli", adjustedStart),
|
||||
timeSeriesFPs.L("unix_milli", adjustedEnd),
|
||||
)
|
||||
if filterExpr != "" {
|
||||
filterClause, err := m.buildFilterClause(ctx, &qbtypes.Filter{Expression: filterExpr}, req.Start, req.End)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
if filterClause != nil {
|
||||
timeSeriesFPs.AddWhereClause(filterClause)
|
||||
}
|
||||
}
|
||||
timeSeriesFPsGroupBy := []string{"fingerprint", "pod_uid"}
|
||||
for _, key := range req.GroupBy {
|
||||
timeSeriesFPsGroupBy = append(timeSeriesFPsGroupBy, quoteIdentifier(key.Name))
|
||||
}
|
||||
timeSeriesFPs.GroupBy(timeSeriesFPsGroupBy...)
|
||||
timeSeriesFPsSQL, timeSeriesFPsArgs := timeSeriesFPs.BuildWithFlavor(sqlbuilder.ClickHouse)
|
||||
|
||||
latestPhasePerPod := sqlbuilder.NewSelectBuilder()
|
||||
latestPhasePerPodSelectCols := []string{"tsfp.pod_uid AS pod_uid"}
|
||||
latestPhasePerPodGroupBy := []string{"pod_uid"}
|
||||
for _, key := range req.GroupBy {
|
||||
col := quoteIdentifier(key.Name)
|
||||
latestPhasePerPodSelectCols = append(latestPhasePerPodSelectCols, fmt.Sprintf("tsfp.%s AS %s", col, col))
|
||||
latestPhasePerPodGroupBy = append(latestPhasePerPodGroupBy, col)
|
||||
}
|
||||
latestPhasePerPodSelectCols = append(latestPhasePerPodSelectCols,
|
||||
fmt.Sprintf("argMax(samples.%s, samples.unix_milli) AS phase_value", valueCol),
|
||||
)
|
||||
latestPhasePerPod.Select(latestPhasePerPodSelectCols...)
|
||||
latestPhasePerPod.From(fmt.Sprintf(
|
||||
"%s.%s AS samples INNER JOIN time_series_fps AS tsfp ON samples.fingerprint = tsfp.fingerprint",
|
||||
telemetrymetrics.DBName, samplesTable,
|
||||
))
|
||||
latestPhasePerPod.Where(
|
||||
latestPhasePerPod.E("samples.metric_name", podPhaseMetricName),
|
||||
latestPhasePerPod.GE("samples.unix_milli", req.Start),
|
||||
latestPhasePerPod.L("samples.unix_milli", req.End),
|
||||
"tsfp.pod_uid != ''",
|
||||
)
|
||||
latestPhasePerPod.GroupBy(latestPhasePerPodGroupBy...)
|
||||
latestPhasePerPodSQL, latestPhasePerPodArgs := latestPhasePerPod.BuildWithFlavor(sqlbuilder.ClickHouse)
|
||||
|
||||
// ----- countPodsPerPhase (outer SELECT) -----
|
||||
countPodsPerPhaseSelectCols := make([]string, 0, len(req.GroupBy)+5)
|
||||
countPodsPerPhaseGroupBy := make([]string, 0, len(req.GroupBy))
|
||||
for _, key := range req.GroupBy {
|
||||
col := quoteIdentifier(key.Name)
|
||||
countPodsPerPhaseSelectCols = append(countPodsPerPhaseSelectCols, col)
|
||||
countPodsPerPhaseGroupBy = append(countPodsPerPhaseGroupBy, col)
|
||||
}
|
||||
countPodsPerPhaseSelectCols = append(countPodsPerPhaseSelectCols,
|
||||
fmt.Sprintf("uniqExactIf(pod_uid, phase_value = %d) AS pending_count", inframonitoringtypes.PodPhaseNumPending),
|
||||
fmt.Sprintf("uniqExactIf(pod_uid, phase_value = %d) AS running_count", inframonitoringtypes.PodPhaseNumRunning),
|
||||
fmt.Sprintf("uniqExactIf(pod_uid, phase_value = %d) AS succeeded_count", inframonitoringtypes.PodPhaseNumSucceeded),
|
||||
fmt.Sprintf("uniqExactIf(pod_uid, phase_value = %d) AS failed_count", inframonitoringtypes.PodPhaseNumFailed),
|
||||
fmt.Sprintf("uniqExactIf(pod_uid, phase_value = %d) AS unknown_count", inframonitoringtypes.PodPhaseNumUnknown),
|
||||
)
|
||||
countPodsPerPhaseSQL := fmt.Sprintf(
|
||||
"SELECT %s FROM latest_phase_per_pod GROUP BY %s",
|
||||
strings.Join(countPodsPerPhaseSelectCols, ", "),
|
||||
strings.Join(countPodsPerPhaseGroupBy, ", "),
|
||||
)
|
||||
|
||||
// Combine CTEs + outer.
|
||||
cteFragments := []string{
|
||||
fmt.Sprintf("time_series_fps AS (%s)", timeSeriesFPsSQL),
|
||||
fmt.Sprintf("latest_phase_per_pod AS (%s)", latestPhasePerPodSQL),
|
||||
}
|
||||
finalSQL := querybuilder.CombineCTEs(cteFragments) + countPodsPerPhaseSQL
|
||||
finalArgs := querybuilder.PrependArgs([][]any{timeSeriesFPsArgs, latestPhasePerPodArgs}, nil)
|
||||
|
||||
rows, err := m.telemetryStore.ClickhouseDB().Query(ctx, finalSQL, finalArgs...)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
defer rows.Close()
|
||||
|
||||
result := make(map[string]podPhaseCounts)
|
||||
for rows.Next() {
|
||||
groupVals := make([]string, len(req.GroupBy))
|
||||
scanPtrs := make([]any, 0, len(req.GroupBy)+5)
|
||||
for i := range groupVals {
|
||||
scanPtrs = append(scanPtrs, &groupVals[i])
|
||||
}
|
||||
var pending, running, succeeded, failed, unknown uint64
|
||||
scanPtrs = append(scanPtrs, &pending, &running, &succeeded, &failed, &unknown)
|
||||
|
||||
if err := rows.Scan(scanPtrs...); err != nil {
|
||||
return nil, err
|
||||
}
|
||||
result[compositeKeyFromList(groupVals)] = podPhaseCounts{
|
||||
Pending: int(pending),
|
||||
Running: int(running),
|
||||
Succeeded: int(succeeded),
|
||||
Failed: int(failed),
|
||||
Unknown: int(unknown),
|
||||
}
|
||||
}
|
||||
if err := rows.Err(); err != nil {
|
||||
return nil, err
|
||||
}
|
||||
return result, nil
|
||||
}
|
||||
@@ -0,0 +1,178 @@
|
||||
package implinframonitoring
|
||||
|
||||
import (
|
||||
"github.com/SigNoz/signoz/pkg/types/inframonitoringtypes"
|
||||
"github.com/SigNoz/signoz/pkg/types/metrictypes"
|
||||
qbtypes "github.com/SigNoz/signoz/pkg/types/querybuildertypes/querybuildertypesv5"
|
||||
"github.com/SigNoz/signoz/pkg/types/telemetrytypes"
|
||||
)
|
||||
|
||||
const (
|
||||
podUIDAttrKey = "k8s.pod.uid"
|
||||
podStartTimeAttrKey = "k8s.pod.start_time"
|
||||
podPhaseMetricName = "k8s.pod.phase"
|
||||
)
|
||||
|
||||
var podUIDGroupByKey = qbtypes.GroupByKey{
|
||||
TelemetryFieldKey: telemetrytypes.TelemetryFieldKey{
|
||||
Name: podUIDAttrKey,
|
||||
FieldContext: telemetrytypes.FieldContextResource,
|
||||
FieldDataType: telemetrytypes.FieldDataTypeString,
|
||||
},
|
||||
}
|
||||
|
||||
var podsTableMetricNamesList = []string{
|
||||
"k8s.pod.cpu.usage",
|
||||
"k8s.pod.cpu_request_utilization",
|
||||
"k8s.pod.cpu_limit_utilization",
|
||||
"k8s.pod.memory.working_set",
|
||||
"k8s.pod.memory_request_utilization",
|
||||
"k8s.pod.memory_limit_utilization",
|
||||
"k8s.pod.phase",
|
||||
}
|
||||
|
||||
var podAttrKeysForMetadata = []string{
|
||||
"k8s.pod.uid",
|
||||
"k8s.pod.name",
|
||||
"k8s.namespace.name",
|
||||
"k8s.node.name",
|
||||
"k8s.deployment.name",
|
||||
"k8s.statefulset.name",
|
||||
"k8s.daemonset.name",
|
||||
"k8s.job.name",
|
||||
"k8s.cronjob.name",
|
||||
"k8s.cluster.name",
|
||||
"k8s.pod.start_time",
|
||||
}
|
||||
|
||||
var orderByToPodsQueryNames = map[string][]string{
|
||||
inframonitoringtypes.PodsOrderByCPU: {"A"},
|
||||
inframonitoringtypes.PodsOrderByCPURequest: {"B"},
|
||||
inframonitoringtypes.PodsOrderByCPULimit: {"C"},
|
||||
inframonitoringtypes.PodsOrderByMemory: {"D"},
|
||||
inframonitoringtypes.PodsOrderByMemoryRequest: {"E"},
|
||||
inframonitoringtypes.PodsOrderByMemoryLimit: {"F"},
|
||||
}
|
||||
|
||||
// newPodsTableListQuery builds the composite QB v5 request for the pods list.
|
||||
// Pod phase is derived separately via getPerGroupPodPhaseCounts (works for both
|
||||
// list and grouped_list modes), so no phase query is included here.
|
||||
func (m *module) newPodsTableListQuery() *qbtypes.QueryRangeRequest {
|
||||
queries := []qbtypes.QueryEnvelope{
|
||||
// Query A: CPU usage
|
||||
{
|
||||
Type: qbtypes.QueryTypeBuilder,
|
||||
Spec: qbtypes.QueryBuilderQuery[qbtypes.MetricAggregation]{
|
||||
Name: "A",
|
||||
Signal: telemetrytypes.SignalMetrics,
|
||||
Aggregations: []qbtypes.MetricAggregation{
|
||||
{
|
||||
MetricName: "k8s.pod.cpu.usage",
|
||||
TimeAggregation: metrictypes.TimeAggregationAvg,
|
||||
SpaceAggregation: metrictypes.SpaceAggregationSum,
|
||||
ReduceTo: qbtypes.ReduceToAvg,
|
||||
},
|
||||
},
|
||||
GroupBy: []qbtypes.GroupByKey{podUIDGroupByKey},
|
||||
Disabled: false,
|
||||
},
|
||||
},
|
||||
// Query B: CPU request utilization
|
||||
{
|
||||
Type: qbtypes.QueryTypeBuilder,
|
||||
Spec: qbtypes.QueryBuilderQuery[qbtypes.MetricAggregation]{
|
||||
Name: "B",
|
||||
Signal: telemetrytypes.SignalMetrics,
|
||||
Aggregations: []qbtypes.MetricAggregation{
|
||||
{
|
||||
MetricName: "k8s.pod.cpu_request_utilization",
|
||||
TimeAggregation: metrictypes.TimeAggregationAvg,
|
||||
SpaceAggregation: metrictypes.SpaceAggregationAvg,
|
||||
ReduceTo: qbtypes.ReduceToAvg,
|
||||
},
|
||||
},
|
||||
GroupBy: []qbtypes.GroupByKey{podUIDGroupByKey},
|
||||
Disabled: false,
|
||||
},
|
||||
},
|
||||
// Query C: CPU limit utilization
|
||||
{
|
||||
Type: qbtypes.QueryTypeBuilder,
|
||||
Spec: qbtypes.QueryBuilderQuery[qbtypes.MetricAggregation]{
|
||||
Name: "C",
|
||||
Signal: telemetrytypes.SignalMetrics,
|
||||
Aggregations: []qbtypes.MetricAggregation{
|
||||
{
|
||||
MetricName: "k8s.pod.cpu_limit_utilization",
|
||||
TimeAggregation: metrictypes.TimeAggregationAvg,
|
||||
SpaceAggregation: metrictypes.SpaceAggregationAvg,
|
||||
ReduceTo: qbtypes.ReduceToAvg,
|
||||
},
|
||||
},
|
||||
GroupBy: []qbtypes.GroupByKey{podUIDGroupByKey},
|
||||
Disabled: false,
|
||||
},
|
||||
},
|
||||
// Query D: Memory working set
|
||||
{
|
||||
Type: qbtypes.QueryTypeBuilder,
|
||||
Spec: qbtypes.QueryBuilderQuery[qbtypes.MetricAggregation]{
|
||||
Name: "D",
|
||||
Signal: telemetrytypes.SignalMetrics,
|
||||
Aggregations: []qbtypes.MetricAggregation{
|
||||
{
|
||||
MetricName: "k8s.pod.memory.working_set",
|
||||
TimeAggregation: metrictypes.TimeAggregationAvg,
|
||||
SpaceAggregation: metrictypes.SpaceAggregationSum,
|
||||
ReduceTo: qbtypes.ReduceToAvg,
|
||||
},
|
||||
},
|
||||
GroupBy: []qbtypes.GroupByKey{podUIDGroupByKey},
|
||||
Disabled: false,
|
||||
},
|
||||
},
|
||||
// Query E: Memory request utilization
|
||||
{
|
||||
Type: qbtypes.QueryTypeBuilder,
|
||||
Spec: qbtypes.QueryBuilderQuery[qbtypes.MetricAggregation]{
|
||||
Name: "E",
|
||||
Signal: telemetrytypes.SignalMetrics,
|
||||
Aggregations: []qbtypes.MetricAggregation{
|
||||
{
|
||||
MetricName: "k8s.pod.memory_request_utilization",
|
||||
TimeAggregation: metrictypes.TimeAggregationAvg,
|
||||
SpaceAggregation: metrictypes.SpaceAggregationAvg,
|
||||
ReduceTo: qbtypes.ReduceToAvg,
|
||||
},
|
||||
},
|
||||
GroupBy: []qbtypes.GroupByKey{podUIDGroupByKey},
|
||||
Disabled: false,
|
||||
},
|
||||
},
|
||||
// Query F: Memory limit utilization
|
||||
{
|
||||
Type: qbtypes.QueryTypeBuilder,
|
||||
Spec: qbtypes.QueryBuilderQuery[qbtypes.MetricAggregation]{
|
||||
Name: "F",
|
||||
Signal: telemetrytypes.SignalMetrics,
|
||||
Aggregations: []qbtypes.MetricAggregation{
|
||||
{
|
||||
MetricName: "k8s.pod.memory_limit_utilization",
|
||||
TimeAggregation: metrictypes.TimeAggregationAvg,
|
||||
SpaceAggregation: metrictypes.SpaceAggregationAvg,
|
||||
ReduceTo: qbtypes.ReduceToAvg,
|
||||
},
|
||||
},
|
||||
GroupBy: []qbtypes.GroupByKey{podUIDGroupByKey},
|
||||
Disabled: false,
|
||||
},
|
||||
},
|
||||
}
|
||||
|
||||
return &qbtypes.QueryRangeRequest{
|
||||
RequestType: qbtypes.RequestTypeScalar,
|
||||
CompositeQuery: qbtypes.CompositeQuery{
|
||||
Queries: queries,
|
||||
},
|
||||
}
|
||||
}
|
||||
@@ -10,8 +10,10 @@ import (
|
||||
|
||||
type Handler interface {
|
||||
ListHosts(http.ResponseWriter, *http.Request)
|
||||
ListPods(http.ResponseWriter, *http.Request)
|
||||
}
|
||||
|
||||
type Module interface {
|
||||
ListHosts(ctx context.Context, orgID valuer.UUID, req *inframonitoringtypes.PostableHosts) (*inframonitoringtypes.Hosts, error)
|
||||
ListPods(ctx context.Context, orgID valuer.UUID, req *inframonitoringtypes.PostablePods) (*inframonitoringtypes.Pods, error)
|
||||
}
|
||||
|
||||
@@ -32,30 +32,28 @@ import (
|
||||
)
|
||||
|
||||
type PrepareTaskOptions struct {
|
||||
Rule *ruletypes.PostableRule
|
||||
TaskName string
|
||||
RuleStore ruletypes.RuleStore
|
||||
MaintenanceStore ruletypes.MaintenanceStore
|
||||
Querier querier.Querier
|
||||
Logger *slog.Logger
|
||||
Cache cache.Cache
|
||||
ManagerOpts *ManagerOptions
|
||||
NotifyFunc NotifyFunc
|
||||
SQLStore sqlstore.SQLStore
|
||||
OrgID valuer.UUID
|
||||
Rule *ruletypes.PostableRule
|
||||
TaskName string
|
||||
RuleStore ruletypes.RuleStore
|
||||
Querier querier.Querier
|
||||
Logger *slog.Logger
|
||||
Cache cache.Cache
|
||||
ManagerOpts *ManagerOptions
|
||||
NotifyFunc NotifyFunc
|
||||
SQLStore sqlstore.SQLStore
|
||||
OrgID valuer.UUID
|
||||
}
|
||||
|
||||
type PrepareTestRuleOptions struct {
|
||||
Rule *ruletypes.PostableRule
|
||||
RuleStore ruletypes.RuleStore
|
||||
MaintenanceStore ruletypes.MaintenanceStore
|
||||
Querier querier.Querier
|
||||
Logger *slog.Logger
|
||||
Cache cache.Cache
|
||||
ManagerOpts *ManagerOptions
|
||||
NotifyFunc NotifyFunc
|
||||
SQLStore sqlstore.SQLStore
|
||||
OrgID valuer.UUID
|
||||
Rule *ruletypes.PostableRule
|
||||
RuleStore ruletypes.RuleStore
|
||||
Querier querier.Querier
|
||||
Logger *slog.Logger
|
||||
Cache cache.Cache
|
||||
ManagerOpts *ManagerOptions
|
||||
NotifyFunc NotifyFunc
|
||||
SQLStore sqlstore.SQLStore
|
||||
OrgID valuer.UUID
|
||||
}
|
||||
|
||||
const taskNameSuffix = "webAppEditor"
|
||||
@@ -134,7 +132,6 @@ func defaultOptions(o *ManagerOptions) *ManagerOptions {
|
||||
}
|
||||
|
||||
func defaultPrepareTaskFunc(opts PrepareTaskOptions) (Task, error) {
|
||||
|
||||
rules := make([]Rule, 0)
|
||||
var task Task
|
||||
|
||||
@@ -159,7 +156,6 @@ func defaultPrepareTaskFunc(opts PrepareTaskOptions) (Task, error) {
|
||||
WithMetadataStore(opts.ManagerOpts.MetadataStore),
|
||||
WithRuleStateHistoryModule(opts.ManagerOpts.RuleStateHistoryModule),
|
||||
)
|
||||
|
||||
if err != nil {
|
||||
return task, err
|
||||
}
|
||||
@@ -167,7 +163,7 @@ func defaultPrepareTaskFunc(opts PrepareTaskOptions) (Task, error) {
|
||||
rules = append(rules, tr)
|
||||
|
||||
// create ch rule task for evaluation
|
||||
task = newTask(TaskTypeCh, opts.TaskName, taskNameSuffix, evaluation.GetFrequency().Duration(), rules, opts.ManagerOpts, opts.NotifyFunc, opts.MaintenanceStore, opts.OrgID)
|
||||
task = newTask(TaskTypeCh, opts.TaskName, taskNameSuffix, evaluation.GetFrequency().Duration(), rules, opts.ManagerOpts, opts.NotifyFunc)
|
||||
|
||||
} else if opts.Rule.RuleType == ruletypes.RuleTypeProm {
|
||||
|
||||
@@ -183,7 +179,6 @@ func defaultPrepareTaskFunc(opts PrepareTaskOptions) (Task, error) {
|
||||
WithMetadataStore(opts.ManagerOpts.MetadataStore),
|
||||
WithRuleStateHistoryModule(opts.ManagerOpts.RuleStateHistoryModule),
|
||||
)
|
||||
|
||||
if err != nil {
|
||||
return task, err
|
||||
}
|
||||
@@ -191,7 +186,7 @@ func defaultPrepareTaskFunc(opts PrepareTaskOptions) (Task, error) {
|
||||
rules = append(rules, pr)
|
||||
|
||||
// create promql rule task for evaluation
|
||||
task = newTask(TaskTypeProm, opts.TaskName, taskNameSuffix, evaluation.GetFrequency().Duration(), rules, opts.ManagerOpts, opts.NotifyFunc, opts.MaintenanceStore, opts.OrgID)
|
||||
task = newTask(TaskTypeProm, opts.TaskName, taskNameSuffix, evaluation.GetFrequency().Duration(), rules, opts.ManagerOpts, opts.NotifyFunc)
|
||||
|
||||
} else {
|
||||
return nil, errors.NewInvalidInputf(errors.CodeInvalidInput, "unsupported rule type %s. Supported types: %s, %s", opts.Rule.RuleType, ruletypes.RuleTypeProm, ruletypes.RuleTypeThreshold)
|
||||
@@ -241,6 +236,7 @@ func (m *Manager) MaintenanceStore() ruletypes.MaintenanceStore {
|
||||
return m.maintenanceStore
|
||||
}
|
||||
|
||||
// TODO: remove (unused)?
|
||||
func (m *Manager) Pause(b bool) {
|
||||
m.mtx.Lock()
|
||||
defer m.mtx.Unlock()
|
||||
@@ -430,19 +426,17 @@ func (m *Manager) editTask(_ context.Context, orgID valuer.UUID, rule *ruletypes
|
||||
m.logger.Debug("editing a rule task", "name", taskName)
|
||||
|
||||
newTask, err := m.prepareTaskFunc(PrepareTaskOptions{
|
||||
Rule: rule,
|
||||
TaskName: taskName,
|
||||
RuleStore: m.ruleStore,
|
||||
MaintenanceStore: m.maintenanceStore,
|
||||
Querier: m.opts.Querier,
|
||||
Logger: m.opts.Logger,
|
||||
Cache: m.cache,
|
||||
ManagerOpts: m.opts,
|
||||
NotifyFunc: m.notifyFunc,
|
||||
SQLStore: m.sqlstore,
|
||||
OrgID: orgID,
|
||||
Rule: rule,
|
||||
TaskName: taskName,
|
||||
RuleStore: m.ruleStore,
|
||||
Querier: m.opts.Querier,
|
||||
Logger: m.opts.Logger,
|
||||
Cache: m.cache,
|
||||
ManagerOpts: m.opts,
|
||||
NotifyFunc: m.notifyFunc,
|
||||
SQLStore: m.sqlstore,
|
||||
OrgID: orgID,
|
||||
})
|
||||
|
||||
if err != nil {
|
||||
m.logger.Error("loading tasks failed", errors.Attr(err))
|
||||
return errors.NewInvalidInputf(errors.CodeInvalidInput, "error preparing rule with given parameters, previous rule set restored")
|
||||
@@ -643,19 +637,17 @@ func (m *Manager) addTask(_ context.Context, orgID valuer.UUID, rule *ruletypes.
|
||||
|
||||
m.logger.Debug("adding a new rule task", "name", taskName)
|
||||
newTask, err := m.prepareTaskFunc(PrepareTaskOptions{
|
||||
Rule: rule,
|
||||
TaskName: taskName,
|
||||
RuleStore: m.ruleStore,
|
||||
MaintenanceStore: m.maintenanceStore,
|
||||
Querier: m.opts.Querier,
|
||||
Logger: m.opts.Logger,
|
||||
Cache: m.cache,
|
||||
ManagerOpts: m.opts,
|
||||
NotifyFunc: m.notifyFunc,
|
||||
SQLStore: m.sqlstore,
|
||||
OrgID: orgID,
|
||||
Rule: rule,
|
||||
TaskName: taskName,
|
||||
RuleStore: m.ruleStore,
|
||||
Querier: m.opts.Querier,
|
||||
Logger: m.opts.Logger,
|
||||
Cache: m.cache,
|
||||
ManagerOpts: m.opts,
|
||||
NotifyFunc: m.notifyFunc,
|
||||
SQLStore: m.sqlstore,
|
||||
OrgID: orgID,
|
||||
})
|
||||
|
||||
if err != nil {
|
||||
m.logger.Error("creating rule task failed", "name", taskName, errors.Attr(err))
|
||||
return errors.NewInvalidInputf(errors.CodeInvalidInput, "error loading rules, previous rule set restored")
|
||||
@@ -703,7 +695,6 @@ func (m *Manager) RuleTasks() []Task {
|
||||
// RuleTasksWithoutLock returns the list of manager's rule tasks without
|
||||
// acquiring a lock on the manager.
|
||||
func (m *Manager) RuleTasksWithoutLock() []Task {
|
||||
|
||||
rgs := make([]Task, 0, len(m.tasks))
|
||||
for _, g := range m.tasks {
|
||||
rgs = append(rgs, g)
|
||||
@@ -897,7 +888,6 @@ func (m *Manager) GetRule(ctx context.Context, id valuer.UUID) (*ruletypes.Getta
|
||||
// the task state. For example - if a stored rule is disabled, then
|
||||
// there is no task running against it.
|
||||
func (m *Manager) syncRuleStateWithTask(ctx context.Context, orgID valuer.UUID, taskName string, rule *ruletypes.PostableRule) error {
|
||||
|
||||
if rule.Disabled {
|
||||
// check if rule has any task running
|
||||
if _, ok := m.tasks[taskName]; ok {
|
||||
@@ -1029,16 +1019,15 @@ func (m *Manager) TestNotification(ctx context.Context, orgID valuer.UUID, ruleS
|
||||
}
|
||||
|
||||
alertCount, err := m.prepareTestRuleFunc(PrepareTestRuleOptions{
|
||||
Rule: &parsedRule,
|
||||
RuleStore: m.ruleStore,
|
||||
MaintenanceStore: m.maintenanceStore,
|
||||
Querier: m.opts.Querier,
|
||||
Logger: m.opts.Logger,
|
||||
Cache: m.cache,
|
||||
ManagerOpts: m.opts,
|
||||
NotifyFunc: m.testNotifyFunc,
|
||||
SQLStore: m.sqlstore,
|
||||
OrgID: orgID,
|
||||
Rule: &parsedRule,
|
||||
RuleStore: m.ruleStore,
|
||||
Querier: m.opts.Querier,
|
||||
Logger: m.opts.Logger,
|
||||
Cache: m.cache,
|
||||
ManagerOpts: m.opts,
|
||||
NotifyFunc: m.testNotifyFunc,
|
||||
SQLStore: m.sqlstore,
|
||||
OrgID: orgID,
|
||||
})
|
||||
|
||||
return alertCount, err
|
||||
|
||||
@@ -15,7 +15,6 @@ import (
|
||||
"github.com/SigNoz/signoz/pkg/types/authtypes"
|
||||
"github.com/SigNoz/signoz/pkg/types/ctxtypes"
|
||||
ruletypes "github.com/SigNoz/signoz/pkg/types/ruletypes"
|
||||
"github.com/SigNoz/signoz/pkg/valuer"
|
||||
)
|
||||
|
||||
// PromRuleTask is a promql rule executor
|
||||
@@ -39,14 +38,11 @@ type PromRuleTask struct {
|
||||
pause bool
|
||||
logger *slog.Logger
|
||||
notify NotifyFunc
|
||||
|
||||
maintenanceStore ruletypes.MaintenanceStore
|
||||
orgID valuer.UUID
|
||||
}
|
||||
|
||||
// NewPromRuleTask holds rules that have promql condition
|
||||
// and evaluates the rule at a given frequency
|
||||
func NewPromRuleTask(name, file string, frequency time.Duration, rules []Rule, opts *ManagerOptions, notify NotifyFunc, maintenanceStore ruletypes.MaintenanceStore, orgID valuer.UUID) *PromRuleTask {
|
||||
func NewPromRuleTask(name, file string, frequency time.Duration, rules []Rule, opts *ManagerOptions, notify NotifyFunc) *PromRuleTask {
|
||||
opts.Logger.Info("initiating a new rule group", "name", name, "frequency", frequency)
|
||||
|
||||
if frequency == 0 {
|
||||
@@ -63,10 +59,8 @@ func NewPromRuleTask(name, file string, frequency time.Duration, rules []Rule, o
|
||||
seriesInPreviousEval: make([]map[string]plabels.Labels, len(rules)),
|
||||
done: make(chan struct{}),
|
||||
terminated: make(chan struct{}),
|
||||
notify: notify,
|
||||
maintenanceStore: maintenanceStore,
|
||||
logger: opts.Logger,
|
||||
orgID: orgID,
|
||||
notify: notify,
|
||||
logger: opts.Logger,
|
||||
}
|
||||
}
|
||||
|
||||
@@ -330,30 +324,12 @@ func (g *PromRuleTask) Eval(ctx context.Context, ts time.Time) {
|
||||
}()
|
||||
|
||||
g.logger.InfoContext(ctx, "promql rule task", "name", g.name, "eval_started_at", ts)
|
||||
maintenance, err := g.maintenanceStore.ListPlannedMaintenance(ctx, g.orgID.StringValue())
|
||||
if err != nil {
|
||||
g.logger.ErrorContext(ctx, "error in processing sql query", errors.Attr(err))
|
||||
}
|
||||
|
||||
for i, rule := range g.rules {
|
||||
if rule == nil {
|
||||
continue
|
||||
}
|
||||
|
||||
shouldSkip := false
|
||||
for _, m := range maintenance {
|
||||
g.logger.InfoContext(ctx, "checking if rule should be skipped", slog.String("rule.id", rule.ID()), slog.Any("maintenance", m))
|
||||
if m.ShouldSkip(rule.ID(), ts) {
|
||||
shouldSkip = true
|
||||
break
|
||||
}
|
||||
}
|
||||
|
||||
if shouldSkip {
|
||||
g.logger.InfoContext(ctx, "rule should be skipped", slog.String("rule.id", rule.ID()))
|
||||
continue
|
||||
}
|
||||
|
||||
select {
|
||||
case <-g.done:
|
||||
return
|
||||
|
||||
@@ -2,20 +2,18 @@ package rules
|
||||
|
||||
import (
|
||||
"context"
|
||||
"log/slog"
|
||||
"runtime/debug"
|
||||
"sort"
|
||||
"sync"
|
||||
"time"
|
||||
|
||||
"log/slog"
|
||||
|
||||
opentracing "github.com/opentracing/opentracing-go"
|
||||
|
||||
"github.com/SigNoz/signoz/pkg/errors"
|
||||
"github.com/SigNoz/signoz/pkg/types/authtypes"
|
||||
"github.com/SigNoz/signoz/pkg/types/ctxtypes"
|
||||
"github.com/SigNoz/signoz/pkg/types/ruletypes"
|
||||
"github.com/SigNoz/signoz/pkg/valuer"
|
||||
)
|
||||
|
||||
// RuleTask holds a rule (with composite queries)
|
||||
@@ -37,34 +35,28 @@ type RuleTask struct {
|
||||
|
||||
pause bool
|
||||
notify NotifyFunc
|
||||
|
||||
maintenanceStore ruletypes.MaintenanceStore
|
||||
orgID valuer.UUID
|
||||
}
|
||||
|
||||
const DefaultFrequency = 1 * time.Minute
|
||||
|
||||
// NewRuleTask makes a new RuleTask with the given name, options, and rules.
|
||||
func NewRuleTask(name, file string, frequency time.Duration, rules []Rule, opts *ManagerOptions, notify NotifyFunc, maintenanceStore ruletypes.MaintenanceStore, orgID valuer.UUID) *RuleTask {
|
||||
|
||||
func NewRuleTask(name, file string, frequency time.Duration, rules []Rule, opts *ManagerOptions, notify NotifyFunc) *RuleTask {
|
||||
if frequency == 0 {
|
||||
frequency = DefaultFrequency
|
||||
}
|
||||
opts.Logger.Info("initiating a new rule task", "name", name, "frequency", frequency)
|
||||
|
||||
return &RuleTask{
|
||||
name: name,
|
||||
file: file,
|
||||
pause: false,
|
||||
frequency: frequency,
|
||||
rules: rules,
|
||||
opts: opts,
|
||||
logger: opts.Logger,
|
||||
done: make(chan struct{}),
|
||||
terminated: make(chan struct{}),
|
||||
notify: notify,
|
||||
maintenanceStore: maintenanceStore,
|
||||
orgID: orgID,
|
||||
name: name,
|
||||
file: file,
|
||||
pause: false,
|
||||
frequency: frequency,
|
||||
rules: rules,
|
||||
opts: opts,
|
||||
logger: opts.Logger,
|
||||
done: make(chan struct{}),
|
||||
terminated: make(chan struct{}),
|
||||
notify: notify,
|
||||
}
|
||||
}
|
||||
|
||||
@@ -72,6 +64,7 @@ func NewRuleTask(name, file string, frequency time.Duration, rules []Rule, opts
|
||||
func (g *RuleTask) Name() string { return g.name }
|
||||
|
||||
// Key returns the group key
|
||||
// TODO: remove (unused)?
|
||||
func (g *RuleTask) Key() string {
|
||||
return g.name + ";" + g.file
|
||||
}
|
||||
@@ -261,7 +254,6 @@ func nameAndLabels(rule Rule) string {
|
||||
// Rules are matched based on their name and labels. If there are duplicates, the
|
||||
// first is matched with the first, second with the second etc.
|
||||
func (g *RuleTask) CopyState(fromTask Task) error {
|
||||
|
||||
from, ok := fromTask.(*RuleTask)
|
||||
if !ok {
|
||||
return errors.NewInternalf(errors.CodeInternal, "invalid from task for copy")
|
||||
@@ -306,7 +298,6 @@ func (g *RuleTask) CopyState(fromTask Task) error {
|
||||
|
||||
// Eval runs a single evaluation cycle in which all rules are evaluated sequentially.
|
||||
func (g *RuleTask) Eval(ctx context.Context, ts time.Time) {
|
||||
|
||||
defer func() {
|
||||
if r := recover(); r != nil {
|
||||
g.logger.ErrorContext(
|
||||
@@ -318,31 +309,11 @@ func (g *RuleTask) Eval(ctx context.Context, ts time.Time) {
|
||||
|
||||
g.logger.DebugContext(ctx, "rule task eval started", "name", g.name, "start_time", ts)
|
||||
|
||||
maintenance, err := g.maintenanceStore.ListPlannedMaintenance(ctx, g.orgID.StringValue())
|
||||
|
||||
if err != nil {
|
||||
g.logger.ErrorContext(ctx, "error in processing sql query", errors.Attr(err))
|
||||
}
|
||||
|
||||
for i, rule := range g.rules {
|
||||
if rule == nil {
|
||||
continue
|
||||
}
|
||||
|
||||
shouldSkip := false
|
||||
for _, m := range maintenance {
|
||||
g.logger.InfoContext(ctx, "checking if rule should be skipped", slog.String("rule.id", rule.ID()), slog.Any("maintenance", m))
|
||||
if m.ShouldSkip(rule.ID(), ts) {
|
||||
shouldSkip = true
|
||||
break
|
||||
}
|
||||
}
|
||||
|
||||
if shouldSkip {
|
||||
g.logger.InfoContext(ctx, "rule should be skipped", slog.String("rule.id", rule.ID()))
|
||||
continue
|
||||
}
|
||||
|
||||
select {
|
||||
case <-g.done:
|
||||
return
|
||||
@@ -382,7 +353,6 @@ func (g *RuleTask) Eval(ctx context.Context, ts time.Time) {
|
||||
}
|
||||
|
||||
rule.SendAlerts(ctx, ts, g.opts.ResendDelay, g.frequency, g.notify)
|
||||
|
||||
}(i, rule)
|
||||
}
|
||||
}
|
||||
|
||||
@@ -3,9 +3,6 @@ package rules
|
||||
import (
|
||||
"context"
|
||||
"time"
|
||||
|
||||
"github.com/SigNoz/signoz/pkg/types/ruletypes"
|
||||
"github.com/SigNoz/signoz/pkg/valuer"
|
||||
)
|
||||
|
||||
type TaskType string
|
||||
@@ -32,9 +29,9 @@ type Task interface {
|
||||
|
||||
// newTask returns an appropriate group for
|
||||
// rule type
|
||||
func newTask(taskType TaskType, name, file string, frequency time.Duration, rules []Rule, opts *ManagerOptions, notify NotifyFunc, maintenanceStore ruletypes.MaintenanceStore, orgID valuer.UUID) Task {
|
||||
func newTask(taskType TaskType, name, file string, frequency time.Duration, rules []Rule, opts *ManagerOptions, notify NotifyFunc) Task {
|
||||
if taskType == TaskTypeCh {
|
||||
return NewRuleTask(name, file, frequency, rules, opts, notify, maintenanceStore, orgID)
|
||||
return NewRuleTask(name, file, frequency, rules, opts, notify)
|
||||
}
|
||||
return NewPromRuleTask(name, file, frequency, rules, opts, notify, maintenanceStore, orgID)
|
||||
return NewPromRuleTask(name, file, frequency, rules, opts, notify)
|
||||
}
|
||||
|
||||
@@ -19,6 +19,7 @@ import (
|
||||
"github.com/SigNoz/signoz/pkg/modules/user/impluser"
|
||||
"github.com/SigNoz/signoz/pkg/querier"
|
||||
"github.com/SigNoz/signoz/pkg/queryparser"
|
||||
"github.com/SigNoz/signoz/pkg/ruler/rulestore/sqlrulestore"
|
||||
"github.com/SigNoz/signoz/pkg/sharder"
|
||||
"github.com/SigNoz/signoz/pkg/sharder/noopsharder"
|
||||
"github.com/SigNoz/signoz/pkg/sqlstore"
|
||||
@@ -38,7 +39,8 @@ func TestNewHandlers(t *testing.T) {
|
||||
orgGetter := implorganization.NewGetter(implorganization.NewStore(sqlstore), sharder)
|
||||
notificationManager := nfmanagertest.NewMock()
|
||||
require.NoError(t, err)
|
||||
alertmanager, err := signozalertmanager.New(context.TODO(), providerSettings, alertmanager.Config{}, sqlstore, orgGetter, notificationManager)
|
||||
maintenanceStore := sqlrulestore.NewMaintenanceStore(sqlstore)
|
||||
alertmanager, err := signozalertmanager.New(providerSettings, alertmanager.Config{}, sqlstore, orgGetter, notificationManager, maintenanceStore)
|
||||
require.NoError(t, err)
|
||||
tokenizer := tokenizertest.NewMockTokenizer(t)
|
||||
emailing := emailingtest.New()
|
||||
|
||||
@@ -20,6 +20,7 @@ import (
|
||||
"github.com/SigNoz/signoz/pkg/modules/serviceaccount/implserviceaccount"
|
||||
"github.com/SigNoz/signoz/pkg/modules/user/impluser"
|
||||
"github.com/SigNoz/signoz/pkg/queryparser"
|
||||
"github.com/SigNoz/signoz/pkg/ruler/rulestore/sqlrulestore"
|
||||
"github.com/SigNoz/signoz/pkg/sharder"
|
||||
"github.com/SigNoz/signoz/pkg/sharder/noopsharder"
|
||||
"github.com/SigNoz/signoz/pkg/sqlstore"
|
||||
@@ -39,7 +40,8 @@ func TestNewModules(t *testing.T) {
|
||||
orgGetter := implorganization.NewGetter(implorganization.NewStore(sqlstore), sharder)
|
||||
notificationManager := nfmanagertest.NewMock()
|
||||
require.NoError(t, err)
|
||||
alertmanager, err := signozalertmanager.New(context.TODO(), providerSettings, alertmanager.Config{}, sqlstore, orgGetter, notificationManager)
|
||||
maintenanceStore := sqlrulestore.NewMaintenanceStore(sqlstore)
|
||||
alertmanager, err := signozalertmanager.New(providerSettings, alertmanager.Config{}, sqlstore, orgGetter, notificationManager, maintenanceStore)
|
||||
require.NoError(t, err)
|
||||
tokenizer := tokenizertest.NewMockTokenizer(t)
|
||||
emailing := emailingtest.New()
|
||||
|
||||
@@ -65,6 +65,7 @@ import (
|
||||
"github.com/SigNoz/signoz/pkg/tokenizer/tokenizerstore/sqltokenizerstore"
|
||||
"github.com/SigNoz/signoz/pkg/types/alertmanagertypes"
|
||||
"github.com/SigNoz/signoz/pkg/types/featuretypes"
|
||||
ruletypes "github.com/SigNoz/signoz/pkg/types/ruletypes"
|
||||
"github.com/SigNoz/signoz/pkg/version"
|
||||
"github.com/SigNoz/signoz/pkg/web"
|
||||
"github.com/SigNoz/signoz/pkg/web/noopweb"
|
||||
@@ -221,9 +222,14 @@ func NewNotificationManagerProviderFactories(routeStore alertmanagertypes.RouteS
|
||||
)
|
||||
}
|
||||
|
||||
func NewAlertmanagerProviderFactories(sqlstore sqlstore.SQLStore, orgGetter organization.Getter, nfManager nfmanager.NotificationManager) factory.NamedMap[factory.ProviderFactory[alertmanager.Alertmanager, alertmanager.Config]] {
|
||||
func NewAlertmanagerProviderFactories(
|
||||
sqlstore sqlstore.SQLStore,
|
||||
orgGetter organization.Getter,
|
||||
nfManager nfmanager.NotificationManager,
|
||||
maintenanceStore ruletypes.MaintenanceStore,
|
||||
) factory.NamedMap[factory.ProviderFactory[alertmanager.Alertmanager, alertmanager.Config]] {
|
||||
return factory.MustNewNamedMap(
|
||||
signozalertmanager.NewFactory(sqlstore, orgGetter, nfManager),
|
||||
signozalertmanager.NewFactory(sqlstore, orgGetter, nfManager, maintenanceStore),
|
||||
)
|
||||
}
|
||||
|
||||
|
||||
@@ -14,6 +14,7 @@ import (
|
||||
"github.com/SigNoz/signoz/pkg/modules/user/impluser"
|
||||
"github.com/SigNoz/signoz/pkg/sqlschema"
|
||||
"github.com/SigNoz/signoz/pkg/sqlschema/sqlschematest"
|
||||
"github.com/SigNoz/signoz/pkg/ruler/rulestore/sqlrulestore"
|
||||
"github.com/SigNoz/signoz/pkg/sqlstore"
|
||||
"github.com/SigNoz/signoz/pkg/sqlstore/sqlstoretest"
|
||||
"github.com/SigNoz/signoz/pkg/statsreporter"
|
||||
@@ -59,9 +60,11 @@ func TestNewProviderFactories(t *testing.T) {
|
||||
})
|
||||
|
||||
assert.NotPanics(t, func() {
|
||||
orgGetter := implorganization.NewGetter(implorganization.NewStore(sqlstoretest.New(sqlstore.Config{Provider: "sqlite"}, sqlmock.QueryMatcherEqual)), nil)
|
||||
store := sqlstoretest.New(sqlstore.Config{Provider: "sqlite"}, sqlmock.QueryMatcherEqual)
|
||||
orgGetter := implorganization.NewGetter(implorganization.NewStore(store), nil)
|
||||
notificationManager := nfmanagertest.NewMock()
|
||||
NewAlertmanagerProviderFactories(sqlstoretest.New(sqlstore.Config{Provider: "sqlite"}, sqlmock.QueryMatcherEqual), orgGetter, notificationManager)
|
||||
maintenanceStore := sqlrulestore.NewMaintenanceStore(store)
|
||||
NewAlertmanagerProviderFactories(store, orgGetter, notificationManager, maintenanceStore)
|
||||
})
|
||||
|
||||
assert.NotPanics(t, func() {
|
||||
|
||||
@@ -34,6 +34,7 @@ import (
|
||||
"github.com/SigNoz/signoz/pkg/querier"
|
||||
"github.com/SigNoz/signoz/pkg/queryparser"
|
||||
"github.com/SigNoz/signoz/pkg/ruler"
|
||||
sqlrulestore "github.com/SigNoz/signoz/pkg/ruler/rulestore/sqlrulestore"
|
||||
"github.com/SigNoz/signoz/pkg/sharder"
|
||||
"github.com/SigNoz/signoz/pkg/sqlmigration"
|
||||
"github.com/SigNoz/signoz/pkg/sqlmigrator"
|
||||
@@ -362,12 +363,14 @@ func New(
|
||||
return nil, err
|
||||
}
|
||||
|
||||
maintenanceStore := sqlrulestore.NewMaintenanceStore(sqlstore)
|
||||
|
||||
// Initialize alertmanager from the available alertmanager provider factories
|
||||
alertmanager, err := factory.NewProviderFromNamedMap(
|
||||
ctx,
|
||||
providerSettings,
|
||||
config.Alertmanager,
|
||||
NewAlertmanagerProviderFactories(sqlstore, orgGetter, nfManager),
|
||||
NewAlertmanagerProviderFactories(sqlstore, orgGetter, nfManager, maintenanceStore),
|
||||
config.Alertmanager.Provider,
|
||||
)
|
||||
if err != nil {
|
||||
|
||||
@@ -124,6 +124,17 @@ func CountExpressionForSamplesTable(tableName string) string {
|
||||
return "sum(count)"
|
||||
}
|
||||
|
||||
// ValueColumnForSamplesTable returns the column name holding the sample value:
|
||||
// "last" for the 5m/30m aggregated tables, "value" otherwise.
|
||||
// note all the other columns in the aggregated samples tables are nothing but aggregations.
|
||||
// and so "last" is the value column for these tables.
|
||||
func ValueColumnForSamplesTable(tableName string) string {
|
||||
if tableName == SamplesV4Agg5mTableName || tableName == SamplesV4Agg30mTableName {
|
||||
return "last"
|
||||
}
|
||||
return "value"
|
||||
}
|
||||
|
||||
// start and end are in milliseconds
|
||||
// we have three tables for samples
|
||||
// 1. distributed_samples_v4
|
||||
|
||||
@@ -49,7 +49,7 @@ type HostFilter struct {
|
||||
FilterByStatus HostStatus `json:"filterByStatus"`
|
||||
}
|
||||
|
||||
// Validate ensures HostsListRequest contains acceptable values.
|
||||
// Validate ensures PostableHosts contains acceptable values.
|
||||
func (req *PostableHosts) Validate() error {
|
||||
if req == nil {
|
||||
return errors.NewInvalidInputf(errors.CodeInvalidInput, "request is nil")
|
||||
|
||||
109
pkg/types/inframonitoringtypes/pods.go
Normal file
109
pkg/types/inframonitoringtypes/pods.go
Normal file
@@ -0,0 +1,109 @@
|
||||
package inframonitoringtypes
|
||||
|
||||
import (
|
||||
"encoding/json"
|
||||
"slices"
|
||||
|
||||
"github.com/SigNoz/signoz/pkg/errors"
|
||||
qbtypes "github.com/SigNoz/signoz/pkg/types/querybuildertypes/querybuildertypesv5"
|
||||
)
|
||||
|
||||
type Pods struct {
|
||||
Type ResponseType `json:"type" required:"true"`
|
||||
Records []PodRecord `json:"records" required:"true"`
|
||||
Total int `json:"total" required:"true"`
|
||||
RequiredMetricsCheck RequiredMetricsCheck `json:"requiredMetricsCheck" required:"true"`
|
||||
EndTimeBeforeRetention bool `json:"endTimeBeforeRetention" required:"true"`
|
||||
Warning *qbtypes.QueryWarnData `json:"warning,omitempty"`
|
||||
}
|
||||
|
||||
type PodRecord struct {
|
||||
PodUID string `json:"podUID" required:"true"`
|
||||
PodCPU float64 `json:"podCPU" required:"true"`
|
||||
PodCPURequest float64 `json:"podCPURequest" required:"true"`
|
||||
PodCPULimit float64 `json:"podCPULimit" required:"true"`
|
||||
PodMemory float64 `json:"podMemory" required:"true"`
|
||||
PodMemoryRequest float64 `json:"podMemoryRequest" required:"true"`
|
||||
PodMemoryLimit float64 `json:"podMemoryLimit" required:"true"`
|
||||
PodPhase PodPhase `json:"podPhase" required:"true"`
|
||||
PendingPodCount int `json:"pendingPodCount" required:"true"`
|
||||
RunningPodCount int `json:"runningPodCount" required:"true"`
|
||||
SucceededPodCount int `json:"succeededPodCount" required:"true"`
|
||||
FailedPodCount int `json:"failedPodCount" required:"true"`
|
||||
UnknownPodCount int `json:"unknownPodCount" required:"true"`
|
||||
PodAge int64 `json:"podAge" required:"true"`
|
||||
Meta map[string]interface{} `json:"meta" required:"true"`
|
||||
}
|
||||
|
||||
// PostablePods is the request body for the v2 pods list API.
|
||||
type PostablePods struct {
|
||||
Start int64 `json:"start" required:"true"`
|
||||
End int64 `json:"end" required:"true"`
|
||||
Filter *qbtypes.Filter `json:"filter"`
|
||||
GroupBy []qbtypes.GroupByKey `json:"groupBy"`
|
||||
OrderBy *qbtypes.OrderBy `json:"orderBy"`
|
||||
Offset int `json:"offset"`
|
||||
Limit int `json:"limit" required:"true"`
|
||||
}
|
||||
|
||||
// Validate ensures PostablePods contains acceptable values.
|
||||
func (req *PostablePods) Validate() error {
|
||||
if req == nil {
|
||||
return errors.NewInvalidInputf(errors.CodeInvalidInput, "request is nil")
|
||||
}
|
||||
|
||||
if req.Start <= 0 {
|
||||
return errors.NewInvalidInputf(
|
||||
errors.CodeInvalidInput,
|
||||
"invalid start time %d: start must be greater than 0",
|
||||
req.Start,
|
||||
)
|
||||
}
|
||||
|
||||
if req.End <= 0 {
|
||||
return errors.NewInvalidInputf(
|
||||
errors.CodeInvalidInput,
|
||||
"invalid end time %d: end must be greater than 0",
|
||||
req.End,
|
||||
)
|
||||
}
|
||||
|
||||
if req.Start >= req.End {
|
||||
return errors.NewInvalidInputf(
|
||||
errors.CodeInvalidInput,
|
||||
"invalid time range: start (%d) must be less than end (%d)",
|
||||
req.Start,
|
||||
req.End,
|
||||
)
|
||||
}
|
||||
|
||||
if req.Limit < 1 || req.Limit > 5000 {
|
||||
return errors.NewInvalidInputf(errors.CodeInvalidInput, "limit must be between 1 and 5000")
|
||||
}
|
||||
|
||||
if req.Offset < 0 {
|
||||
return errors.NewInvalidInputf(errors.CodeInvalidInput, "offset cannot be negative")
|
||||
}
|
||||
|
||||
if req.OrderBy != nil {
|
||||
if !slices.Contains(PodsValidOrderByKeys, req.OrderBy.Key.Name) {
|
||||
return errors.NewInvalidInputf(errors.CodeInvalidInput, "invalid order by key: %s", req.OrderBy.Key.Name)
|
||||
}
|
||||
if req.OrderBy.Direction != qbtypes.OrderDirectionAsc && req.OrderBy.Direction != qbtypes.OrderDirectionDesc {
|
||||
return errors.NewInvalidInputf(errors.CodeInvalidInput, "invalid order by direction: %s", req.OrderBy.Direction)
|
||||
}
|
||||
}
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
// UnmarshalJSON validates input immediately after decoding.
|
||||
func (req *PostablePods) UnmarshalJSON(data []byte) error {
|
||||
type raw PostablePods
|
||||
var decoded raw
|
||||
if err := json.Unmarshal(data, &decoded); err != nil {
|
||||
return err
|
||||
}
|
||||
*req = PostablePods(decoded)
|
||||
return req.Validate()
|
||||
}
|
||||
55
pkg/types/inframonitoringtypes/pods_constants.go
Normal file
55
pkg/types/inframonitoringtypes/pods_constants.go
Normal file
@@ -0,0 +1,55 @@
|
||||
package inframonitoringtypes
|
||||
|
||||
import "github.com/SigNoz/signoz/pkg/valuer"
|
||||
|
||||
type PodPhase struct {
|
||||
valuer.String
|
||||
}
|
||||
|
||||
var (
|
||||
PodPhasePending = PodPhase{valuer.NewString("pending")}
|
||||
PodPhaseRunning = PodPhase{valuer.NewString("running")}
|
||||
PodPhaseSucceeded = PodPhase{valuer.NewString("succeeded")}
|
||||
PodPhaseFailed = PodPhase{valuer.NewString("failed")}
|
||||
PodPhaseUnknown = PodPhase{valuer.NewString("unknown")}
|
||||
PodPhaseNone = PodPhase{valuer.NewString("")}
|
||||
)
|
||||
|
||||
func (PodPhase) Enum() []any {
|
||||
return []any{
|
||||
PodPhasePending,
|
||||
PodPhaseRunning,
|
||||
PodPhaseSucceeded,
|
||||
PodPhaseFailed,
|
||||
PodPhaseUnknown,
|
||||
PodPhaseNone,
|
||||
}
|
||||
}
|
||||
|
||||
// Numeric pod phase values emitted by the k8s.pod.phase metric
|
||||
// (source: OTel kubeletstats receiver).
|
||||
const (
|
||||
PodPhaseNumPending = 1
|
||||
PodPhaseNumRunning = 2
|
||||
PodPhaseNumSucceeded = 3
|
||||
PodPhaseNumFailed = 4
|
||||
PodPhaseNumUnknown = 5
|
||||
)
|
||||
|
||||
const (
|
||||
PodsOrderByCPU = "cpu"
|
||||
PodsOrderByCPURequest = "cpu_request"
|
||||
PodsOrderByCPULimit = "cpu_limit"
|
||||
PodsOrderByMemory = "memory"
|
||||
PodsOrderByMemoryRequest = "memory_request"
|
||||
PodsOrderByMemoryLimit = "memory_limit"
|
||||
)
|
||||
|
||||
var PodsValidOrderByKeys = []string{
|
||||
PodsOrderByCPU,
|
||||
PodsOrderByCPURequest,
|
||||
PodsOrderByCPULimit,
|
||||
PodsOrderByMemory,
|
||||
PodsOrderByMemoryRequest,
|
||||
PodsOrderByMemoryLimit,
|
||||
}
|
||||
219
pkg/types/inframonitoringtypes/pods_test.go
Normal file
219
pkg/types/inframonitoringtypes/pods_test.go
Normal file
@@ -0,0 +1,219 @@
|
||||
package inframonitoringtypes
|
||||
|
||||
import (
|
||||
"testing"
|
||||
|
||||
"github.com/SigNoz/signoz/pkg/errors"
|
||||
qbtypes "github.com/SigNoz/signoz/pkg/types/querybuildertypes/querybuildertypesv5"
|
||||
"github.com/SigNoz/signoz/pkg/types/telemetrytypes"
|
||||
"github.com/SigNoz/signoz/pkg/valuer"
|
||||
"github.com/stretchr/testify/require"
|
||||
)
|
||||
|
||||
func TestPostablePods_Validate(t *testing.T) {
|
||||
tests := []struct {
|
||||
name string
|
||||
req *PostablePods
|
||||
wantErr bool
|
||||
}{
|
||||
{
|
||||
name: "valid request",
|
||||
req: &PostablePods{
|
||||
Start: 1000,
|
||||
End: 2000,
|
||||
Limit: 100,
|
||||
Offset: 0,
|
||||
},
|
||||
wantErr: false,
|
||||
},
|
||||
{
|
||||
name: "nil request",
|
||||
req: nil,
|
||||
wantErr: true,
|
||||
},
|
||||
{
|
||||
name: "start time zero",
|
||||
req: &PostablePods{
|
||||
Start: 0,
|
||||
End: 2000,
|
||||
Limit: 100,
|
||||
Offset: 0,
|
||||
},
|
||||
wantErr: true,
|
||||
},
|
||||
{
|
||||
name: "start time negative",
|
||||
req: &PostablePods{
|
||||
Start: -1000,
|
||||
End: 2000,
|
||||
Limit: 100,
|
||||
Offset: 0,
|
||||
},
|
||||
wantErr: true,
|
||||
},
|
||||
{
|
||||
name: "end time zero",
|
||||
req: &PostablePods{
|
||||
Start: 1000,
|
||||
End: 0,
|
||||
Limit: 100,
|
||||
Offset: 0,
|
||||
},
|
||||
wantErr: true,
|
||||
},
|
||||
{
|
||||
name: "start time greater than end time",
|
||||
req: &PostablePods{
|
||||
Start: 2000,
|
||||
End: 1000,
|
||||
Limit: 100,
|
||||
Offset: 0,
|
||||
},
|
||||
wantErr: true,
|
||||
},
|
||||
{
|
||||
name: "start time equal to end time",
|
||||
req: &PostablePods{
|
||||
Start: 1000,
|
||||
End: 1000,
|
||||
Limit: 100,
|
||||
Offset: 0,
|
||||
},
|
||||
wantErr: true,
|
||||
},
|
||||
{
|
||||
name: "limit zero",
|
||||
req: &PostablePods{
|
||||
Start: 1000,
|
||||
End: 2000,
|
||||
Limit: 0,
|
||||
Offset: 0,
|
||||
},
|
||||
wantErr: true,
|
||||
},
|
||||
{
|
||||
name: "limit negative",
|
||||
req: &PostablePods{
|
||||
Start: 1000,
|
||||
End: 2000,
|
||||
Limit: -10,
|
||||
Offset: 0,
|
||||
},
|
||||
wantErr: true,
|
||||
},
|
||||
{
|
||||
name: "limit exceeds max",
|
||||
req: &PostablePods{
|
||||
Start: 1000,
|
||||
End: 2000,
|
||||
Limit: 5001,
|
||||
Offset: 0,
|
||||
},
|
||||
wantErr: true,
|
||||
},
|
||||
{
|
||||
name: "offset negative",
|
||||
req: &PostablePods{
|
||||
Start: 1000,
|
||||
End: 2000,
|
||||
Limit: 100,
|
||||
Offset: -5,
|
||||
},
|
||||
wantErr: true,
|
||||
},
|
||||
{
|
||||
name: "orderBy nil is valid",
|
||||
req: &PostablePods{
|
||||
Start: 1000,
|
||||
End: 2000,
|
||||
Limit: 100,
|
||||
Offset: 0,
|
||||
},
|
||||
wantErr: false,
|
||||
},
|
||||
{
|
||||
name: "orderBy with valid key cpu and direction asc",
|
||||
req: &PostablePods{
|
||||
Start: 1000,
|
||||
End: 2000,
|
||||
Limit: 100,
|
||||
Offset: 0,
|
||||
OrderBy: &qbtypes.OrderBy{
|
||||
Key: qbtypes.OrderByKey{
|
||||
TelemetryFieldKey: telemetrytypes.TelemetryFieldKey{
|
||||
Name: PodsOrderByCPU,
|
||||
},
|
||||
},
|
||||
Direction: qbtypes.OrderDirectionAsc,
|
||||
},
|
||||
},
|
||||
wantErr: false,
|
||||
},
|
||||
{
|
||||
name: "orderBy with phase key is rejected",
|
||||
req: &PostablePods{
|
||||
Start: 1000,
|
||||
End: 2000,
|
||||
Limit: 100,
|
||||
Offset: 0,
|
||||
OrderBy: &qbtypes.OrderBy{
|
||||
Key: qbtypes.OrderByKey{
|
||||
TelemetryFieldKey: telemetrytypes.TelemetryFieldKey{
|
||||
Name: "phase",
|
||||
},
|
||||
},
|
||||
Direction: qbtypes.OrderDirectionDesc,
|
||||
},
|
||||
},
|
||||
wantErr: true,
|
||||
},
|
||||
{
|
||||
name: "orderBy with invalid key",
|
||||
req: &PostablePods{
|
||||
Start: 1000,
|
||||
End: 2000,
|
||||
Limit: 100,
|
||||
Offset: 0,
|
||||
OrderBy: &qbtypes.OrderBy{
|
||||
Key: qbtypes.OrderByKey{
|
||||
TelemetryFieldKey: telemetrytypes.TelemetryFieldKey{
|
||||
Name: "unknown",
|
||||
},
|
||||
},
|
||||
Direction: qbtypes.OrderDirectionDesc,
|
||||
},
|
||||
},
|
||||
wantErr: true,
|
||||
},
|
||||
{
|
||||
name: "orderBy with valid key but invalid direction",
|
||||
req: &PostablePods{
|
||||
Start: 1000,
|
||||
End: 2000,
|
||||
Limit: 100,
|
||||
Offset: 0,
|
||||
OrderBy: &qbtypes.OrderBy{
|
||||
Key: qbtypes.OrderByKey{
|
||||
TelemetryFieldKey: telemetrytypes.TelemetryFieldKey{
|
||||
Name: PodsOrderByMemory,
|
||||
},
|
||||
},
|
||||
Direction: qbtypes.OrderDirection{String: valuer.NewString("invalid")},
|
||||
},
|
||||
},
|
||||
wantErr: true,
|
||||
},
|
||||
}
|
||||
|
||||
for _, tt := range tests {
|
||||
t.Run(tt.name, func(t *testing.T) {
|
||||
err := tt.req.Validate()
|
||||
if tt.wantErr {
|
||||
require.Error(t, err)
|
||||
require.True(t, errors.Ast(err, errors.TypeInvalidInput), "expected error to be of type InvalidInput")
|
||||
} else {
|
||||
require.NoError(t, err)
|
||||
}
|
||||
})
|
||||
}
|
||||
}
|
||||
@@ -11,9 +11,7 @@ import (
|
||||
"github.com/uptrace/bun"
|
||||
)
|
||||
|
||||
var (
|
||||
ErrCodeInvalidPlannedMaintenancePayload = errors.MustNewCode("invalid_planned_maintenance_payload")
|
||||
)
|
||||
var ErrCodeInvalidPlannedMaintenancePayload = errors.MustNewCode("invalid_planned_maintenance_payload")
|
||||
|
||||
type MaintenanceStatus struct {
|
||||
valuer.String
|
||||
@@ -63,15 +61,15 @@ type StorablePlannedMaintenance struct {
|
||||
}
|
||||
|
||||
type PlannedMaintenance struct {
|
||||
ID valuer.UUID `json:"id" required:"true"`
|
||||
Name string `json:"name" required:"true"`
|
||||
Description string `json:"description"`
|
||||
Schedule *Schedule `json:"schedule" required:"true"`
|
||||
RuleIDs []string `json:"alertIds"`
|
||||
CreatedAt time.Time `json:"createdAt"`
|
||||
CreatedBy string `json:"createdBy"`
|
||||
UpdatedAt time.Time `json:"updatedAt"`
|
||||
UpdatedBy string `json:"updatedBy"`
|
||||
ID valuer.UUID `json:"id" required:"true"`
|
||||
Name string `json:"name" required:"true"`
|
||||
Description string `json:"description"`
|
||||
Schedule *Schedule `json:"schedule" required:"true"`
|
||||
RuleIDs []string `json:"alertIds"`
|
||||
CreatedAt time.Time `json:"createdAt"`
|
||||
CreatedBy string `json:"createdBy"`
|
||||
UpdatedAt time.Time `json:"updatedAt"`
|
||||
UpdatedBy string `json:"updatedBy"`
|
||||
Status MaintenanceStatus `json:"status" required:"true"`
|
||||
Kind MaintenanceKind `json:"kind" required:"true"`
|
||||
}
|
||||
@@ -161,8 +159,11 @@ func (m *PlannedMaintenance) ShouldSkip(ruleID string, now time.Time) bool {
|
||||
|
||||
currentTime := now.In(loc)
|
||||
|
||||
// fixed schedule
|
||||
if !m.Schedule.StartTime.IsZero() && !m.Schedule.EndTime.IsZero() {
|
||||
// fixed schedule — only when no recurrence is configured.
|
||||
// When recurrence is set, the recurring check below handles everything;
|
||||
// falling through here would cause the window to match the absolute
|
||||
// StartTime–EndTime range instead of the daily/weekly/monthly pattern.
|
||||
if m.Schedule.Recurrence == nil && !m.Schedule.StartTime.IsZero() && !m.Schedule.EndTime.IsZero() {
|
||||
startTime := m.Schedule.StartTime.In(loc)
|
||||
endTime := m.Schedule.EndTime.In(loc)
|
||||
if currentTime.Equal(startTime) || currentTime.Equal(endTime) ||
|
||||
|
||||
@@ -13,7 +13,6 @@ func timePtr(t time.Time) *time.Time {
|
||||
}
|
||||
|
||||
func TestShouldSkipMaintenance(t *testing.T) {
|
||||
|
||||
cases := []struct {
|
||||
name string
|
||||
maintenance *PlannedMaintenance
|
||||
@@ -499,7 +498,7 @@ func TestShouldSkipMaintenance(t *testing.T) {
|
||||
},
|
||||
},
|
||||
},
|
||||
ts: time.Date(2024, 04, 1, 12, 10, 0, 0, time.UTC),
|
||||
ts: time.Date(2024, 4, 1, 12, 10, 0, 0, time.UTC),
|
||||
skip: true,
|
||||
},
|
||||
{
|
||||
@@ -508,14 +507,14 @@ func TestShouldSkipMaintenance(t *testing.T) {
|
||||
Schedule: &Schedule{
|
||||
Timezone: "UTC",
|
||||
Recurrence: &Recurrence{
|
||||
StartTime: time.Date(2024, 04, 01, 12, 0, 0, 0, time.UTC),
|
||||
StartTime: time.Date(2024, 4, 1, 12, 0, 0, 0, time.UTC),
|
||||
Duration: valuer.MustParseTextDuration("2h"),
|
||||
RepeatType: RepeatTypeWeekly,
|
||||
RepeatOn: []RepeatOn{RepeatOnMonday},
|
||||
},
|
||||
},
|
||||
},
|
||||
ts: time.Date(2024, 04, 15, 12, 10, 0, 0, time.UTC),
|
||||
ts: time.Date(2024, 4, 15, 12, 10, 0, 0, time.UTC),
|
||||
skip: true,
|
||||
},
|
||||
{
|
||||
@@ -524,14 +523,14 @@ func TestShouldSkipMaintenance(t *testing.T) {
|
||||
Schedule: &Schedule{
|
||||
Timezone: "UTC",
|
||||
Recurrence: &Recurrence{
|
||||
StartTime: time.Date(2024, 04, 01, 12, 0, 0, 0, time.UTC),
|
||||
StartTime: time.Date(2024, 4, 1, 12, 0, 0, 0, time.UTC),
|
||||
Duration: valuer.MustParseTextDuration("2h"),
|
||||
RepeatType: RepeatTypeWeekly,
|
||||
RepeatOn: []RepeatOn{RepeatOnMonday},
|
||||
},
|
||||
},
|
||||
},
|
||||
ts: time.Date(2024, 04, 14, 12, 10, 0, 0, time.UTC), // 14th 04 is sunday
|
||||
ts: time.Date(2024, 4, 14, 12, 10, 0, 0, time.UTC), // 14th 04 is sunday
|
||||
skip: false,
|
||||
},
|
||||
{
|
||||
@@ -540,14 +539,14 @@ func TestShouldSkipMaintenance(t *testing.T) {
|
||||
Schedule: &Schedule{
|
||||
Timezone: "UTC",
|
||||
Recurrence: &Recurrence{
|
||||
StartTime: time.Date(2024, 04, 01, 12, 0, 0, 0, time.UTC),
|
||||
StartTime: time.Date(2024, 4, 1, 12, 0, 0, 0, time.UTC),
|
||||
Duration: valuer.MustParseTextDuration("2h"),
|
||||
RepeatType: RepeatTypeWeekly,
|
||||
RepeatOn: []RepeatOn{RepeatOnMonday},
|
||||
},
|
||||
},
|
||||
},
|
||||
ts: time.Date(2024, 04, 16, 12, 10, 0, 0, time.UTC), // 16th 04 is tuesday
|
||||
ts: time.Date(2024, 4, 16, 12, 10, 0, 0, time.UTC), // 16th 04 is tuesday
|
||||
skip: false,
|
||||
},
|
||||
{
|
||||
@@ -556,14 +555,14 @@ func TestShouldSkipMaintenance(t *testing.T) {
|
||||
Schedule: &Schedule{
|
||||
Timezone: "UTC",
|
||||
Recurrence: &Recurrence{
|
||||
StartTime: time.Date(2024, 04, 01, 12, 0, 0, 0, time.UTC),
|
||||
StartTime: time.Date(2024, 4, 1, 12, 0, 0, 0, time.UTC),
|
||||
Duration: valuer.MustParseTextDuration("2h"),
|
||||
RepeatType: RepeatTypeWeekly,
|
||||
RepeatOn: []RepeatOn{RepeatOnMonday},
|
||||
},
|
||||
},
|
||||
},
|
||||
ts: time.Date(2024, 05, 06, 12, 10, 0, 0, time.UTC),
|
||||
ts: time.Date(2024, 5, 6, 12, 10, 0, 0, time.UTC),
|
||||
skip: true,
|
||||
},
|
||||
{
|
||||
@@ -572,14 +571,14 @@ func TestShouldSkipMaintenance(t *testing.T) {
|
||||
Schedule: &Schedule{
|
||||
Timezone: "UTC",
|
||||
Recurrence: &Recurrence{
|
||||
StartTime: time.Date(2024, 04, 01, 12, 0, 0, 0, time.UTC),
|
||||
StartTime: time.Date(2024, 4, 1, 12, 0, 0, 0, time.UTC),
|
||||
Duration: valuer.MustParseTextDuration("2h"),
|
||||
RepeatType: RepeatTypeWeekly,
|
||||
RepeatOn: []RepeatOn{RepeatOnMonday},
|
||||
},
|
||||
},
|
||||
},
|
||||
ts: time.Date(2024, 05, 06, 14, 00, 0, 0, time.UTC),
|
||||
ts: time.Date(2024, 5, 6, 14, 0, 0, 0, time.UTC),
|
||||
skip: true,
|
||||
},
|
||||
{
|
||||
@@ -588,13 +587,13 @@ func TestShouldSkipMaintenance(t *testing.T) {
|
||||
Schedule: &Schedule{
|
||||
Timezone: "UTC",
|
||||
Recurrence: &Recurrence{
|
||||
StartTime: time.Date(2024, 04, 04, 12, 0, 0, 0, time.UTC),
|
||||
StartTime: time.Date(2024, 4, 4, 12, 0, 0, 0, time.UTC),
|
||||
Duration: valuer.MustParseTextDuration("2h"),
|
||||
RepeatType: RepeatTypeMonthly,
|
||||
},
|
||||
},
|
||||
},
|
||||
ts: time.Date(2024, 04, 04, 12, 10, 0, 0, time.UTC),
|
||||
ts: time.Date(2024, 4, 4, 12, 10, 0, 0, time.UTC),
|
||||
skip: true,
|
||||
},
|
||||
{
|
||||
@@ -603,13 +602,13 @@ func TestShouldSkipMaintenance(t *testing.T) {
|
||||
Schedule: &Schedule{
|
||||
Timezone: "UTC",
|
||||
Recurrence: &Recurrence{
|
||||
StartTime: time.Date(2024, 04, 04, 12, 0, 0, 0, time.UTC),
|
||||
StartTime: time.Date(2024, 4, 4, 12, 0, 0, 0, time.UTC),
|
||||
Duration: valuer.MustParseTextDuration("2h"),
|
||||
RepeatType: RepeatTypeMonthly,
|
||||
},
|
||||
},
|
||||
},
|
||||
ts: time.Date(2024, 04, 04, 14, 10, 0, 0, time.UTC),
|
||||
ts: time.Date(2024, 4, 4, 14, 10, 0, 0, time.UTC),
|
||||
skip: false,
|
||||
},
|
||||
{
|
||||
@@ -618,13 +617,52 @@ func TestShouldSkipMaintenance(t *testing.T) {
|
||||
Schedule: &Schedule{
|
||||
Timezone: "UTC",
|
||||
Recurrence: &Recurrence{
|
||||
StartTime: time.Date(2024, 04, 04, 12, 0, 0, 0, time.UTC),
|
||||
StartTime: time.Date(2024, 4, 4, 12, 0, 0, 0, time.UTC),
|
||||
Duration: valuer.MustParseTextDuration("2h"),
|
||||
RepeatType: RepeatTypeMonthly,
|
||||
},
|
||||
},
|
||||
},
|
||||
ts: time.Date(2024, 05, 04, 12, 10, 0, 0, time.UTC),
|
||||
ts: time.Date(2024, 5, 4, 12, 10, 0, 0, time.UTC),
|
||||
skip: true,
|
||||
},
|
||||
// The recurrence should govern, when set. Not the fixed range.
|
||||
{
|
||||
name: "recurring-daily-with-fixed-times-outside-daily-window",
|
||||
maintenance: &PlannedMaintenance{
|
||||
Schedule: &Schedule{
|
||||
Timezone: "UTC",
|
||||
// These fixed fields should be ignored when Recurrence is set.
|
||||
StartTime: time.Date(2026, 4, 1, 10, 0, 0, 0, time.UTC),
|
||||
EndTime: time.Date(2026, 4, 30, 18, 0, 0, 0, time.UTC),
|
||||
Recurrence: &Recurrence{
|
||||
StartTime: time.Date(2026, 4, 1, 14, 0, 0, 0, time.UTC), // daily at 14:00
|
||||
Duration: valuer.MustParseTextDuration("2h"), // until 16:00
|
||||
RepeatType: RepeatTypeDaily,
|
||||
},
|
||||
},
|
||||
},
|
||||
// 11:00 is inside the fixed range but outside the daily 14:00-16:00 window.
|
||||
// Before the fix this returned true (bug); after fix it returns false.
|
||||
ts: time.Date(2026, 4, 15, 11, 0, 0, 0, time.UTC),
|
||||
skip: false,
|
||||
},
|
||||
{
|
||||
name: "recurring-daily-with-fixed-times-inside-daily-window",
|
||||
maintenance: &PlannedMaintenance{
|
||||
Schedule: &Schedule{
|
||||
Timezone: "UTC",
|
||||
StartTime: time.Date(2026, 4, 1, 10, 0, 0, 0, time.UTC),
|
||||
EndTime: time.Date(2026, 4, 30, 18, 0, 0, 0, time.UTC),
|
||||
Recurrence: &Recurrence{
|
||||
StartTime: time.Date(2026, 4, 1, 14, 0, 0, 0, time.UTC),
|
||||
Duration: valuer.MustParseTextDuration("2h"),
|
||||
RepeatType: RepeatTypeDaily,
|
||||
},
|
||||
},
|
||||
},
|
||||
// 15:00 is inside the daily 14:00-16:00 window — should skip.
|
||||
ts: time.Date(2026, 4, 15, 15, 0, 0, 0, time.UTC),
|
||||
skip: true,
|
||||
},
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user