mirror of
https://github.com/SigNoz/signoz.git
synced 2026-05-08 11:30:32 +01:00
Compare commits
10 Commits
tvats-pkg-
...
ns/scope
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
35da39247c | ||
|
|
fefef70d84 | ||
|
|
9e94ee30b9 | ||
|
|
ceccc47a34 | ||
|
|
23da5e22ec | ||
|
|
4c1b479149 | ||
|
|
f72204a8b2 | ||
|
|
deb3f385fa | ||
|
|
77ce5f86b1 | ||
|
|
ff211de441 |
@@ -2547,7 +2547,8 @@ components:
|
||||
format: double
|
||||
type: number
|
||||
meta:
|
||||
additionalProperties: {}
|
||||
additionalProperties:
|
||||
type: string
|
||||
nullable: true
|
||||
type: object
|
||||
status:
|
||||
@@ -2597,6 +2598,103 @@ components:
|
||||
- requiredMetricsCheck
|
||||
- endTimeBeforeRetention
|
||||
type: object
|
||||
InframonitoringtypesNodeCondition:
|
||||
enum:
|
||||
- ready
|
||||
- not_ready
|
||||
- no_data
|
||||
type: string
|
||||
InframonitoringtypesNodeCountsByReadiness:
|
||||
properties:
|
||||
notReady:
|
||||
type: integer
|
||||
ready:
|
||||
type: integer
|
||||
required:
|
||||
- ready
|
||||
- notReady
|
||||
type: object
|
||||
InframonitoringtypesNodeRecord:
|
||||
properties:
|
||||
condition:
|
||||
$ref: '#/components/schemas/InframonitoringtypesNodeCondition'
|
||||
meta:
|
||||
additionalProperties:
|
||||
type: string
|
||||
nullable: true
|
||||
type: object
|
||||
nodeCPU:
|
||||
format: double
|
||||
type: number
|
||||
nodeCPUAllocatable:
|
||||
format: double
|
||||
type: number
|
||||
nodeCountsByReadiness:
|
||||
$ref: '#/components/schemas/InframonitoringtypesNodeCountsByReadiness'
|
||||
nodeMemory:
|
||||
format: double
|
||||
type: number
|
||||
nodeMemoryAllocatable:
|
||||
format: double
|
||||
type: number
|
||||
nodeName:
|
||||
type: string
|
||||
podCountsByPhase:
|
||||
$ref: '#/components/schemas/InframonitoringtypesPodCountsByPhase'
|
||||
required:
|
||||
- nodeName
|
||||
- condition
|
||||
- nodeCountsByReadiness
|
||||
- podCountsByPhase
|
||||
- nodeCPU
|
||||
- nodeCPUAllocatable
|
||||
- nodeMemory
|
||||
- nodeMemoryAllocatable
|
||||
- meta
|
||||
type: object
|
||||
InframonitoringtypesNodes:
|
||||
properties:
|
||||
endTimeBeforeRetention:
|
||||
type: boolean
|
||||
records:
|
||||
items:
|
||||
$ref: '#/components/schemas/InframonitoringtypesNodeRecord'
|
||||
nullable: true
|
||||
type: array
|
||||
requiredMetricsCheck:
|
||||
$ref: '#/components/schemas/InframonitoringtypesRequiredMetricsCheck'
|
||||
total:
|
||||
type: integer
|
||||
type:
|
||||
$ref: '#/components/schemas/InframonitoringtypesResponseType'
|
||||
warning:
|
||||
$ref: '#/components/schemas/Querybuildertypesv5QueryWarnData'
|
||||
required:
|
||||
- type
|
||||
- records
|
||||
- total
|
||||
- requiredMetricsCheck
|
||||
- endTimeBeforeRetention
|
||||
type: object
|
||||
InframonitoringtypesPodCountsByPhase:
|
||||
properties:
|
||||
failed:
|
||||
type: integer
|
||||
pending:
|
||||
type: integer
|
||||
running:
|
||||
type: integer
|
||||
succeeded:
|
||||
type: integer
|
||||
unknown:
|
||||
type: integer
|
||||
required:
|
||||
- pending
|
||||
- running
|
||||
- succeeded
|
||||
- failed
|
||||
- unknown
|
||||
type: object
|
||||
InframonitoringtypesPodPhase:
|
||||
enum:
|
||||
- pending
|
||||
@@ -2604,18 +2702,15 @@ components:
|
||||
- succeeded
|
||||
- failed
|
||||
- unknown
|
||||
- ""
|
||||
- no_data
|
||||
type: string
|
||||
InframonitoringtypesPodRecord:
|
||||
properties:
|
||||
failedPodCount:
|
||||
type: integer
|
||||
meta:
|
||||
additionalProperties: {}
|
||||
additionalProperties:
|
||||
type: string
|
||||
nullable: true
|
||||
type: object
|
||||
pendingPodCount:
|
||||
type: integer
|
||||
podAge:
|
||||
format: int64
|
||||
type: integer
|
||||
@@ -2628,6 +2723,8 @@ components:
|
||||
podCPURequest:
|
||||
format: double
|
||||
type: number
|
||||
podCountsByPhase:
|
||||
$ref: '#/components/schemas/InframonitoringtypesPodCountsByPhase'
|
||||
podMemory:
|
||||
format: double
|
||||
type: number
|
||||
@@ -2641,12 +2738,6 @@ components:
|
||||
$ref: '#/components/schemas/InframonitoringtypesPodPhase'
|
||||
podUID:
|
||||
type: string
|
||||
runningPodCount:
|
||||
type: integer
|
||||
succeededPodCount:
|
||||
type: integer
|
||||
unknownPodCount:
|
||||
type: integer
|
||||
required:
|
||||
- podUID
|
||||
- podCPU
|
||||
@@ -2656,11 +2747,7 @@ components:
|
||||
- podMemoryRequest
|
||||
- podMemoryLimit
|
||||
- podPhase
|
||||
- pendingPodCount
|
||||
- runningPodCount
|
||||
- succeededPodCount
|
||||
- failedPodCount
|
||||
- unknownPodCount
|
||||
- podCountsByPhase
|
||||
- podAge
|
||||
- meta
|
||||
type: object
|
||||
@@ -2714,6 +2801,32 @@ components:
|
||||
- end
|
||||
- limit
|
||||
type: object
|
||||
InframonitoringtypesPostableNodes:
|
||||
properties:
|
||||
end:
|
||||
format: int64
|
||||
type: integer
|
||||
filter:
|
||||
$ref: '#/components/schemas/Querybuildertypesv5Filter'
|
||||
groupBy:
|
||||
items:
|
||||
$ref: '#/components/schemas/Querybuildertypesv5GroupByKey'
|
||||
nullable: true
|
||||
type: array
|
||||
limit:
|
||||
type: integer
|
||||
offset:
|
||||
type: integer
|
||||
orderBy:
|
||||
$ref: '#/components/schemas/Querybuildertypesv5OrderBy'
|
||||
start:
|
||||
format: int64
|
||||
type: integer
|
||||
required:
|
||||
- start
|
||||
- end
|
||||
- limit
|
||||
type: object
|
||||
InframonitoringtypesPostablePods:
|
||||
properties:
|
||||
end:
|
||||
@@ -11618,12 +11731,83 @@ paths:
|
||||
summary: List Hosts for Infra Monitoring
|
||||
tags:
|
||||
- inframonitoring
|
||||
/api/v2/infra_monitoring/nodes:
|
||||
post:
|
||||
deprecated: false
|
||||
description: 'Returns a paginated list of Kubernetes nodes with key metrics:
|
||||
CPU usage, CPU allocatable, memory working set, memory allocatable, per-group
|
||||
nodeCountsByReadiness ({ ready, notReady } from each node''s latest k8s.node.condition_ready
|
||||
in the window) and per-group podCountsByPhase ({ pending, running, succeeded,
|
||||
failed, unknown } for pods scheduled on the listed nodes). Each node includes
|
||||
metadata attributes (k8s.node.uid, k8s.cluster.name). The response type is
|
||||
''list'' for the default k8s.node.name grouping (each row is one node with
|
||||
its current condition string: ready / not_ready / no_data) or ''grouped_list''
|
||||
for custom groupBy keys (each row aggregates nodes in the group; condition
|
||||
stays no_data). Supports filtering via a filter expression, custom groupBy,
|
||||
ordering by cpu / cpu_allocatable / memory / memory_allocatable, and pagination
|
||||
via offset/limit. Also reports missing required metrics and whether the requested
|
||||
time range falls before the data retention boundary. Numeric metric fields
|
||||
(nodeCPU, nodeCPUAllocatable, nodeMemory, nodeMemoryAllocatable) return -1
|
||||
as a sentinel when no data is available for that field.'
|
||||
operationId: ListNodes
|
||||
requestBody:
|
||||
content:
|
||||
application/json:
|
||||
schema:
|
||||
$ref: '#/components/schemas/InframonitoringtypesPostableNodes'
|
||||
responses:
|
||||
"200":
|
||||
content:
|
||||
application/json:
|
||||
schema:
|
||||
properties:
|
||||
data:
|
||||
$ref: '#/components/schemas/InframonitoringtypesNodes'
|
||||
status:
|
||||
type: string
|
||||
required:
|
||||
- status
|
||||
- data
|
||||
type: object
|
||||
description: OK
|
||||
"400":
|
||||
content:
|
||||
application/json:
|
||||
schema:
|
||||
$ref: '#/components/schemas/RenderErrorResponse'
|
||||
description: Bad Request
|
||||
"401":
|
||||
content:
|
||||
application/json:
|
||||
schema:
|
||||
$ref: '#/components/schemas/RenderErrorResponse'
|
||||
description: Unauthorized
|
||||
"403":
|
||||
content:
|
||||
application/json:
|
||||
schema:
|
||||
$ref: '#/components/schemas/RenderErrorResponse'
|
||||
description: Forbidden
|
||||
"500":
|
||||
content:
|
||||
application/json:
|
||||
schema:
|
||||
$ref: '#/components/schemas/RenderErrorResponse'
|
||||
description: Internal Server Error
|
||||
security:
|
||||
- api_key:
|
||||
- VIEWER
|
||||
- tokenizer:
|
||||
- VIEWER
|
||||
summary: List Nodes for Infra Monitoring
|
||||
tags:
|
||||
- inframonitoring
|
||||
/api/v2/infra_monitoring/pods:
|
||||
post:
|
||||
deprecated: false
|
||||
description: 'Returns a paginated list of Kubernetes pods with key metrics:
|
||||
CPU usage, CPU request/limit utilization, memory working set, memory request/limit
|
||||
utilization, current pod phase (pending/running/succeeded/failed/unknown),
|
||||
utilization, current pod phase (pending/running/succeeded/failed/unknown/no_data),
|
||||
and pod age (ms since start time). Each pod includes metadata attributes (namespace,
|
||||
node, workload owner such as deployment/statefulset/daemonset/job/cronjob,
|
||||
cluster). Supports filtering via a filter expression, custom groupBy to aggregate
|
||||
@@ -11631,13 +11815,13 @@ paths:
|
||||
cpu_limit, memory, memory_request, memory_limit), and pagination via offset/limit.
|
||||
The response type is ''list'' for the default k8s.pod.uid grouping (each row
|
||||
is one pod with its current phase) or ''grouped_list'' for custom groupBy
|
||||
keys (each row aggregates pods in the group with per-phase counts: pendingPodCount,
|
||||
runningPodCount, succeededPodCount, failedPodCount, unknownPodCount derived
|
||||
from each pod''s latest phase in the window). Also reports missing required
|
||||
metrics and whether the requested time range falls before the data retention
|
||||
boundary. Numeric metric fields (podCPU, podCPURequest, podCPULimit, podMemory,
|
||||
podMemoryRequest, podMemoryLimit, podAge) return -1 as a sentinel when no
|
||||
data is available for that field.'
|
||||
keys (each row aggregates pods in the group with per-phase counts under podCountsByPhase:
|
||||
{ pending, running, succeeded, failed, unknown } derived from each pod''s
|
||||
latest phase in the window). Also reports missing required metrics and whether
|
||||
the requested time range falls before the data retention boundary. Numeric
|
||||
metric fields (podCPU, podCPURequest, podCPULimit, podMemory, podMemoryRequest,
|
||||
podMemoryLimit, podAge) return -1 as a sentinel when no data is available
|
||||
for that field.'
|
||||
operationId: ListPods
|
||||
requestBody:
|
||||
content:
|
||||
|
||||
@@ -239,10 +239,12 @@
|
||||
},
|
||||
"lint-staged": {
|
||||
"*.(js|jsx|ts|tsx)": [
|
||||
"oxlint --fix",
|
||||
"oxfmt --write",
|
||||
"sh -c tsgo --noEmit"
|
||||
],
|
||||
"*.(js|jsx|ts|tsx|scss|css)": [
|
||||
"oxlint --fix --quiet --no-error-on-unmatched-pattern",
|
||||
"oxfmt --write"
|
||||
],
|
||||
"*.(scss|css)": [
|
||||
"stylelint"
|
||||
]
|
||||
|
||||
@@ -13,8 +13,10 @@ import type {
|
||||
|
||||
import type {
|
||||
InframonitoringtypesPostableHostsDTO,
|
||||
InframonitoringtypesPostableNodesDTO,
|
||||
InframonitoringtypesPostablePodsDTO,
|
||||
ListHosts200,
|
||||
ListNodes200,
|
||||
ListPods200,
|
||||
RenderErrorResponseDTO,
|
||||
} from '../sigNoz.schemas';
|
||||
@@ -107,7 +109,91 @@ export const useListHosts = <
|
||||
return useMutation(mutationOptions);
|
||||
};
|
||||
/**
|
||||
* Returns a paginated list of Kubernetes pods with key metrics: CPU usage, CPU request/limit utilization, memory working set, memory request/limit utilization, current pod phase (pending/running/succeeded/failed/unknown), and pod age (ms since start time). Each pod includes metadata attributes (namespace, node, workload owner such as deployment/statefulset/daemonset/job/cronjob, cluster). Supports filtering via a filter expression, custom groupBy to aggregate pods by any attribute, ordering by any of the six metrics (cpu, cpu_request, cpu_limit, memory, memory_request, memory_limit), and pagination via offset/limit. The response type is 'list' for the default k8s.pod.uid grouping (each row is one pod with its current phase) or 'grouped_list' for custom groupBy keys (each row aggregates pods in the group with per-phase counts: pendingPodCount, runningPodCount, succeededPodCount, failedPodCount, unknownPodCount derived from each pod's latest phase in the window). Also reports missing required metrics and whether the requested time range falls before the data retention boundary. Numeric metric fields (podCPU, podCPURequest, podCPULimit, podMemory, podMemoryRequest, podMemoryLimit, podAge) return -1 as a sentinel when no data is available for that field.
|
||||
* Returns a paginated list of Kubernetes nodes with key metrics: CPU usage, CPU allocatable, memory working set, memory allocatable, per-group nodeCountsByReadiness ({ ready, notReady } from each node's latest k8s.node.condition_ready in the window) and per-group podCountsByPhase ({ pending, running, succeeded, failed, unknown } for pods scheduled on the listed nodes). Each node includes metadata attributes (k8s.node.uid, k8s.cluster.name). The response type is 'list' for the default k8s.node.name grouping (each row is one node with its current condition string: ready / not_ready / no_data) or 'grouped_list' for custom groupBy keys (each row aggregates nodes in the group; condition stays no_data). Supports filtering via a filter expression, custom groupBy, ordering by cpu / cpu_allocatable / memory / memory_allocatable, and pagination via offset/limit. Also reports missing required metrics and whether the requested time range falls before the data retention boundary. Numeric metric fields (nodeCPU, nodeCPUAllocatable, nodeMemory, nodeMemoryAllocatable) return -1 as a sentinel when no data is available for that field.
|
||||
* @summary List Nodes for Infra Monitoring
|
||||
*/
|
||||
export const listNodes = (
|
||||
inframonitoringtypesPostableNodesDTO: BodyType<InframonitoringtypesPostableNodesDTO>,
|
||||
signal?: AbortSignal,
|
||||
) => {
|
||||
return GeneratedAPIInstance<ListNodes200>({
|
||||
url: `/api/v2/infra_monitoring/nodes`,
|
||||
method: 'POST',
|
||||
headers: { 'Content-Type': 'application/json' },
|
||||
data: inframonitoringtypesPostableNodesDTO,
|
||||
signal,
|
||||
});
|
||||
};
|
||||
|
||||
export const getListNodesMutationOptions = <
|
||||
TError = ErrorType<RenderErrorResponseDTO>,
|
||||
TContext = unknown,
|
||||
>(options?: {
|
||||
mutation?: UseMutationOptions<
|
||||
Awaited<ReturnType<typeof listNodes>>,
|
||||
TError,
|
||||
{ data: BodyType<InframonitoringtypesPostableNodesDTO> },
|
||||
TContext
|
||||
>;
|
||||
}): UseMutationOptions<
|
||||
Awaited<ReturnType<typeof listNodes>>,
|
||||
TError,
|
||||
{ data: BodyType<InframonitoringtypesPostableNodesDTO> },
|
||||
TContext
|
||||
> => {
|
||||
const mutationKey = ['listNodes'];
|
||||
const { mutation: mutationOptions } = options
|
||||
? options.mutation &&
|
||||
'mutationKey' in options.mutation &&
|
||||
options.mutation.mutationKey
|
||||
? options
|
||||
: { ...options, mutation: { ...options.mutation, mutationKey } }
|
||||
: { mutation: { mutationKey } };
|
||||
|
||||
const mutationFn: MutationFunction<
|
||||
Awaited<ReturnType<typeof listNodes>>,
|
||||
{ data: BodyType<InframonitoringtypesPostableNodesDTO> }
|
||||
> = (props) => {
|
||||
const { data } = props ?? {};
|
||||
|
||||
return listNodes(data);
|
||||
};
|
||||
|
||||
return { mutationFn, ...mutationOptions };
|
||||
};
|
||||
|
||||
export type ListNodesMutationResult = NonNullable<
|
||||
Awaited<ReturnType<typeof listNodes>>
|
||||
>;
|
||||
export type ListNodesMutationBody =
|
||||
BodyType<InframonitoringtypesPostableNodesDTO>;
|
||||
export type ListNodesMutationError = ErrorType<RenderErrorResponseDTO>;
|
||||
|
||||
/**
|
||||
* @summary List Nodes for Infra Monitoring
|
||||
*/
|
||||
export const useListNodes = <
|
||||
TError = ErrorType<RenderErrorResponseDTO>,
|
||||
TContext = unknown,
|
||||
>(options?: {
|
||||
mutation?: UseMutationOptions<
|
||||
Awaited<ReturnType<typeof listNodes>>,
|
||||
TError,
|
||||
{ data: BodyType<InframonitoringtypesPostableNodesDTO> },
|
||||
TContext
|
||||
>;
|
||||
}): UseMutationResult<
|
||||
Awaited<ReturnType<typeof listNodes>>,
|
||||
TError,
|
||||
{ data: BodyType<InframonitoringtypesPostableNodesDTO> },
|
||||
TContext
|
||||
> => {
|
||||
const mutationOptions = getListNodesMutationOptions(options);
|
||||
|
||||
return useMutation(mutationOptions);
|
||||
};
|
||||
/**
|
||||
* Returns a paginated list of Kubernetes pods with key metrics: CPU usage, CPU request/limit utilization, memory working set, memory request/limit utilization, current pod phase (pending/running/succeeded/failed/unknown/no_data), and pod age (ms since start time). Each pod includes metadata attributes (namespace, node, workload owner such as deployment/statefulset/daemonset/job/cronjob, cluster). Supports filtering via a filter expression, custom groupBy to aggregate pods by any attribute, ordering by any of the six metrics (cpu, cpu_request, cpu_limit, memory, memory_request, memory_limit), and pagination via offset/limit. The response type is 'list' for the default k8s.pod.uid grouping (each row is one pod with its current phase) or 'grouped_list' for custom groupBy keys (each row aggregates pods in the group with per-phase counts under podCountsByPhase: { pending, running, succeeded, failed, unknown } derived from each pod's latest phase in the window). Also reports missing required metrics and whether the requested time range falls before the data retention boundary. Numeric metric fields (podCPU, podCPURequest, podCPULimit, podMemory, podMemoryRequest, podMemoryLimit, podAge) return -1 as a sentinel when no data is available for that field.
|
||||
* @summary List Pods for Infra Monitoring
|
||||
*/
|
||||
export const listPods = (
|
||||
|
||||
@@ -4579,7 +4579,7 @@ export interface InframonitoringtypesHostFilterDTO {
|
||||
* @nullable
|
||||
*/
|
||||
export type InframonitoringtypesHostRecordDTOMeta = {
|
||||
[key: string]: unknown;
|
||||
[key: string]: string;
|
||||
} | null;
|
||||
|
||||
export interface InframonitoringtypesHostRecordDTO {
|
||||
@@ -4652,35 +4652,127 @@ export interface InframonitoringtypesHostsDTO {
|
||||
warning?: Querybuildertypesv5QueryWarnDataDTO;
|
||||
}
|
||||
|
||||
export enum InframonitoringtypesNodeConditionDTO {
|
||||
ready = 'ready',
|
||||
not_ready = 'not_ready',
|
||||
no_data = 'no_data',
|
||||
}
|
||||
export interface InframonitoringtypesNodeCountsByReadinessDTO {
|
||||
/**
|
||||
* @type integer
|
||||
*/
|
||||
notReady: number;
|
||||
/**
|
||||
* @type integer
|
||||
*/
|
||||
ready: number;
|
||||
}
|
||||
|
||||
/**
|
||||
* @nullable
|
||||
*/
|
||||
export type InframonitoringtypesNodeRecordDTOMeta = {
|
||||
[key: string]: string;
|
||||
} | null;
|
||||
|
||||
export interface InframonitoringtypesNodeRecordDTO {
|
||||
condition: InframonitoringtypesNodeConditionDTO;
|
||||
/**
|
||||
* @type object
|
||||
* @nullable true
|
||||
*/
|
||||
meta: InframonitoringtypesNodeRecordDTOMeta;
|
||||
/**
|
||||
* @type number
|
||||
* @format double
|
||||
*/
|
||||
nodeCPU: number;
|
||||
/**
|
||||
* @type number
|
||||
* @format double
|
||||
*/
|
||||
nodeCPUAllocatable: number;
|
||||
nodeCountsByReadiness: InframonitoringtypesNodeCountsByReadinessDTO;
|
||||
/**
|
||||
* @type number
|
||||
* @format double
|
||||
*/
|
||||
nodeMemory: number;
|
||||
/**
|
||||
* @type number
|
||||
* @format double
|
||||
*/
|
||||
nodeMemoryAllocatable: number;
|
||||
/**
|
||||
* @type string
|
||||
*/
|
||||
nodeName: string;
|
||||
podCountsByPhase: InframonitoringtypesPodCountsByPhaseDTO;
|
||||
}
|
||||
|
||||
export interface InframonitoringtypesNodesDTO {
|
||||
/**
|
||||
* @type boolean
|
||||
*/
|
||||
endTimeBeforeRetention: boolean;
|
||||
/**
|
||||
* @type array
|
||||
* @nullable true
|
||||
*/
|
||||
records: InframonitoringtypesNodeRecordDTO[] | null;
|
||||
requiredMetricsCheck: InframonitoringtypesRequiredMetricsCheckDTO;
|
||||
/**
|
||||
* @type integer
|
||||
*/
|
||||
total: number;
|
||||
type: InframonitoringtypesResponseTypeDTO;
|
||||
warning?: Querybuildertypesv5QueryWarnDataDTO;
|
||||
}
|
||||
|
||||
export interface InframonitoringtypesPodCountsByPhaseDTO {
|
||||
/**
|
||||
* @type integer
|
||||
*/
|
||||
failed: number;
|
||||
/**
|
||||
* @type integer
|
||||
*/
|
||||
pending: number;
|
||||
/**
|
||||
* @type integer
|
||||
*/
|
||||
running: number;
|
||||
/**
|
||||
* @type integer
|
||||
*/
|
||||
succeeded: number;
|
||||
/**
|
||||
* @type integer
|
||||
*/
|
||||
unknown: number;
|
||||
}
|
||||
|
||||
export enum InframonitoringtypesPodPhaseDTO {
|
||||
pending = 'pending',
|
||||
running = 'running',
|
||||
succeeded = 'succeeded',
|
||||
failed = 'failed',
|
||||
unknown = 'unknown',
|
||||
'' = '',
|
||||
no_data = 'no_data',
|
||||
}
|
||||
/**
|
||||
* @nullable
|
||||
*/
|
||||
export type InframonitoringtypesPodRecordDTOMeta = {
|
||||
[key: string]: unknown;
|
||||
[key: string]: string;
|
||||
} | null;
|
||||
|
||||
export interface InframonitoringtypesPodRecordDTO {
|
||||
/**
|
||||
* @type integer
|
||||
*/
|
||||
failedPodCount: number;
|
||||
/**
|
||||
* @type object
|
||||
* @nullable true
|
||||
*/
|
||||
meta: InframonitoringtypesPodRecordDTOMeta;
|
||||
/**
|
||||
* @type integer
|
||||
*/
|
||||
pendingPodCount: number;
|
||||
/**
|
||||
* @type integer
|
||||
* @format int64
|
||||
@@ -4701,6 +4793,7 @@ export interface InframonitoringtypesPodRecordDTO {
|
||||
* @format double
|
||||
*/
|
||||
podCPURequest: number;
|
||||
podCountsByPhase: InframonitoringtypesPodCountsByPhaseDTO;
|
||||
/**
|
||||
* @type number
|
||||
* @format double
|
||||
@@ -4721,18 +4814,6 @@ export interface InframonitoringtypesPodRecordDTO {
|
||||
* @type string
|
||||
*/
|
||||
podUID: string;
|
||||
/**
|
||||
* @type integer
|
||||
*/
|
||||
runningPodCount: number;
|
||||
/**
|
||||
* @type integer
|
||||
*/
|
||||
succeededPodCount: number;
|
||||
/**
|
||||
* @type integer
|
||||
*/
|
||||
unknownPodCount: number;
|
||||
}
|
||||
|
||||
export interface InframonitoringtypesPodsDTO {
|
||||
@@ -4782,6 +4863,34 @@ export interface InframonitoringtypesPostableHostsDTO {
|
||||
start: number;
|
||||
}
|
||||
|
||||
export interface InframonitoringtypesPostableNodesDTO {
|
||||
/**
|
||||
* @type integer
|
||||
* @format int64
|
||||
*/
|
||||
end: number;
|
||||
filter?: Querybuildertypesv5FilterDTO;
|
||||
/**
|
||||
* @type array
|
||||
* @nullable true
|
||||
*/
|
||||
groupBy?: Querybuildertypesv5GroupByKeyDTO[] | null;
|
||||
/**
|
||||
* @type integer
|
||||
*/
|
||||
limit: number;
|
||||
/**
|
||||
* @type integer
|
||||
*/
|
||||
offset?: number;
|
||||
orderBy?: Querybuildertypesv5OrderByDTO;
|
||||
/**
|
||||
* @type integer
|
||||
* @format int64
|
||||
*/
|
||||
start: number;
|
||||
}
|
||||
|
||||
export interface InframonitoringtypesPostablePodsDTO {
|
||||
/**
|
||||
* @type integer
|
||||
@@ -9133,6 +9242,14 @@ export type ListHosts200 = {
|
||||
status: string;
|
||||
};
|
||||
|
||||
export type ListNodes200 = {
|
||||
data: InframonitoringtypesNodesDTO;
|
||||
/**
|
||||
* @type string
|
||||
*/
|
||||
status: string;
|
||||
};
|
||||
|
||||
export type ListPods200 = {
|
||||
data: InframonitoringtypesPodsDTO;
|
||||
/**
|
||||
|
||||
@@ -35,7 +35,7 @@ func (provider *provider) addInfraMonitoringRoutes(router *mux.Router) error {
|
||||
ID: "ListPods",
|
||||
Tags: []string{"inframonitoring"},
|
||||
Summary: "List Pods for Infra Monitoring",
|
||||
Description: "Returns a paginated list of Kubernetes pods with key metrics: CPU usage, CPU request/limit utilization, memory working set, memory request/limit utilization, current pod phase (pending/running/succeeded/failed/unknown), and pod age (ms since start time). Each pod includes metadata attributes (namespace, node, workload owner such as deployment/statefulset/daemonset/job/cronjob, cluster). Supports filtering via a filter expression, custom groupBy to aggregate pods by any attribute, ordering by any of the six metrics (cpu, cpu_request, cpu_limit, memory, memory_request, memory_limit), and pagination via offset/limit. The response type is 'list' for the default k8s.pod.uid grouping (each row is one pod with its current phase) or 'grouped_list' for custom groupBy keys (each row aggregates pods in the group with per-phase counts: pendingPodCount, runningPodCount, succeededPodCount, failedPodCount, unknownPodCount derived from each pod's latest phase in the window). Also reports missing required metrics and whether the requested time range falls before the data retention boundary. Numeric metric fields (podCPU, podCPURequest, podCPULimit, podMemory, podMemoryRequest, podMemoryLimit, podAge) return -1 as a sentinel when no data is available for that field.",
|
||||
Description: "Returns a paginated list of Kubernetes pods with key metrics: CPU usage, CPU request/limit utilization, memory working set, memory request/limit utilization, current pod phase (pending/running/succeeded/failed/unknown/no_data), and pod age (ms since start time). Each pod includes metadata attributes (namespace, node, workload owner such as deployment/statefulset/daemonset/job/cronjob, cluster). Supports filtering via a filter expression, custom groupBy to aggregate pods by any attribute, ordering by any of the six metrics (cpu, cpu_request, cpu_limit, memory, memory_request, memory_limit), and pagination via offset/limit. The response type is 'list' for the default k8s.pod.uid grouping (each row is one pod with its current phase) or 'grouped_list' for custom groupBy keys (each row aggregates pods in the group with per-phase counts under podCountsByPhase: { pending, running, succeeded, failed, unknown } derived from each pod's latest phase in the window). Also reports missing required metrics and whether the requested time range falls before the data retention boundary. Numeric metric fields (podCPU, podCPURequest, podCPULimit, podMemory, podMemoryRequest, podMemoryLimit, podAge) return -1 as a sentinel when no data is available for that field.",
|
||||
Request: new(inframonitoringtypes.PostablePods),
|
||||
RequestContentType: "application/json",
|
||||
Response: new(inframonitoringtypes.Pods),
|
||||
@@ -48,5 +48,24 @@ func (provider *provider) addInfraMonitoringRoutes(router *mux.Router) error {
|
||||
return err
|
||||
}
|
||||
|
||||
if err := router.Handle("/api/v2/infra_monitoring/nodes", handler.New(
|
||||
provider.authZ.ViewAccess(provider.infraMonitoringHandler.ListNodes),
|
||||
handler.OpenAPIDef{
|
||||
ID: "ListNodes",
|
||||
Tags: []string{"inframonitoring"},
|
||||
Summary: "List Nodes for Infra Monitoring",
|
||||
Description: "Returns a paginated list of Kubernetes nodes with key metrics: CPU usage, CPU allocatable, memory working set, memory allocatable, per-group nodeCountsByReadiness ({ ready, notReady } from each node's latest k8s.node.condition_ready in the window) and per-group podCountsByPhase ({ pending, running, succeeded, failed, unknown } for pods scheduled on the listed nodes). Each node includes metadata attributes (k8s.node.uid, k8s.cluster.name). The response type is 'list' for the default k8s.node.name grouping (each row is one node with its current condition string: ready / not_ready / no_data) or 'grouped_list' for custom groupBy keys (each row aggregates nodes in the group; condition stays no_data). Supports filtering via a filter expression, custom groupBy, ordering by cpu / cpu_allocatable / memory / memory_allocatable, and pagination via offset/limit. Also reports missing required metrics and whether the requested time range falls before the data retention boundary. Numeric metric fields (nodeCPU, nodeCPUAllocatable, nodeMemory, nodeMemoryAllocatable) return -1 as a sentinel when no data is available for that field.",
|
||||
Request: new(inframonitoringtypes.PostableNodes),
|
||||
RequestContentType: "application/json",
|
||||
Response: new(inframonitoringtypes.Nodes),
|
||||
ResponseContentType: "application/json",
|
||||
SuccessStatusCode: http.StatusOK,
|
||||
ErrorStatusCodes: []int{http.StatusBadRequest, http.StatusUnauthorized},
|
||||
Deprecated: false,
|
||||
SecuritySchemes: newSecuritySchemes(types.RoleViewer),
|
||||
})).Methods(http.MethodPost).GetError(); err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
@@ -1,99 +0,0 @@
|
||||
package v2
|
||||
|
||||
import (
|
||||
"regexp"
|
||||
"sync"
|
||||
"time"
|
||||
)
|
||||
|
||||
// Code is a dotted, hierarchical identifier registered at process start. It
|
||||
// encodes domain (subsystem), op (verb), optional sub (qualifier), and a
|
||||
// terminal reason. Codes are values; two Codes with the same string are equal
|
||||
// by value and safe to compare with ==.
|
||||
type Code struct{ s string }
|
||||
|
||||
// String returns the dotted code as it appears on the wire. Empty for the
|
||||
// zero value.
|
||||
func (c Code) String() string { return c.s }
|
||||
|
||||
// codePattern allows 2-4 dotted segments, each starting with a lowercase
|
||||
// letter and continuing with [a-z0-9_]. One segment is too broad (use a
|
||||
// domain prefix); five or more means the domain should be split.
|
||||
var codePattern = regexp.MustCompile(`^[a-z][a-z0-9_]*(\.[a-z][a-z0-9_]*){1,3}$`)
|
||||
|
||||
// Meta is the per-code default envelope applied by constructors before
|
||||
// per-call options. Every field has a natural per-code default — an auth
|
||||
// code always wants Reauthenticate, every documented code wants its docs
|
||||
// URL — so the registry is the right place to declare them once.
|
||||
type Meta struct {
|
||||
Category Category
|
||||
Fault Fault
|
||||
Retry Retry
|
||||
Remediation Remediation
|
||||
Refs map[RefKind]string
|
||||
}
|
||||
|
||||
// Retry tells the caller how and when to retry. After is meaningful only
|
||||
// when Policy == RetryAfter.
|
||||
type Retry struct {
|
||||
Policy RetryPolicy
|
||||
After time.Duration
|
||||
}
|
||||
|
||||
var (
|
||||
registryMu sync.RWMutex
|
||||
registry = map[string]Meta{}
|
||||
)
|
||||
|
||||
// Register installs a code with its default Meta and returns the Code value.
|
||||
// It panics on a malformed code string or a duplicate registration — both
|
||||
// indicate a programming error that must be caught at boot, not at first
|
||||
// failure.
|
||||
//
|
||||
// Call from the owning domain's package init or top-level var block:
|
||||
//
|
||||
// var CodeUnknownFunction = errors.Register("query.parse.unknown_function", errors.Meta{
|
||||
// Category: errors.CategoryInvalidInput,
|
||||
// Fault: errors.FaultCaller,
|
||||
// Retry: errors.Retry{Policy: errors.RetryAfterFix},
|
||||
// })
|
||||
func Register(s string, meta Meta) Code {
|
||||
if !codePattern.MatchString(s) {
|
||||
panic("errors/v2: malformed code: " + s)
|
||||
}
|
||||
registryMu.Lock()
|
||||
defer registryMu.Unlock()
|
||||
if _, ok := registry[s]; ok {
|
||||
panic("errors/v2: duplicate code: " + s)
|
||||
}
|
||||
registry[s] = meta
|
||||
return Code{s: s}
|
||||
}
|
||||
|
||||
// MetaOf returns the Meta a code was registered with. Returns the zero Meta
|
||||
// and false for unregistered or zero codes.
|
||||
func MetaOf(c Code) (Meta, bool) {
|
||||
if c.s == "" {
|
||||
return Meta{}, false
|
||||
}
|
||||
registryMu.RLock()
|
||||
defer registryMu.RUnlock()
|
||||
m, ok := registry[c.s]
|
||||
return m, ok
|
||||
}
|
||||
|
||||
// registerOrGet is the internal idempotent register used by adapters that
|
||||
// may see the same code (e.g. legacy.<v1>) more than once across the process
|
||||
// lifetime. It panics on malformed codes — duplicate codes silently keep the
|
||||
// existing Meta.
|
||||
func registerOrGet(s string, meta Meta) Code {
|
||||
if !codePattern.MatchString(s) {
|
||||
panic("errors/v2: malformed code: " + s)
|
||||
}
|
||||
registryMu.Lock()
|
||||
defer registryMu.Unlock()
|
||||
if _, ok := registry[s]; !ok {
|
||||
registry[s] = meta
|
||||
}
|
||||
return Code{s: s}
|
||||
}
|
||||
@@ -1,93 +0,0 @@
|
||||
package v2
|
||||
|
||||
// The enums in this file are closed sets. Each value is a package-level var of
|
||||
// an unexported-field struct, so external code cannot synthesize new values —
|
||||
// it must reference one of the defined ones. String() returns the stable
|
||||
// snake_case wire name; once shipped, those names are append-only.
|
||||
|
||||
// Category groups errors by what kind of failure occurred. It is the coarsest
|
||||
// branch-worthy axis and is intended to be a superset of gRPC status codes
|
||||
// extended with cases SigNoz cares about (e.g. license issues land under
|
||||
// FailedDependency or ResourceExhausted depending on context).
|
||||
type Category struct{ s string }
|
||||
|
||||
func (c Category) String() string { return c.s }
|
||||
|
||||
var (
|
||||
CategoryInvalidInput = Category{"invalid_input"} // request was malformed or violated a documented constraint.
|
||||
CategoryNotFound = Category{"not_found"} // referenced resource does not exist.
|
||||
CategoryAlreadyExists = Category{"already_exists"} // resource creation conflicts with an existing one.
|
||||
CategoryConflict = Category{"conflict"} // concurrent modification or state mismatch (e.g. stale revision).
|
||||
CategoryPrecondition = Category{"precondition"} // a required precondition (system or caller-asserted) was not met.
|
||||
CategoryUnauthenticated = Category{"unauthenticated"} // credentials are missing or invalid.
|
||||
CategoryForbidden = Category{"forbidden"} // authenticated but not authorized for this action.
|
||||
CategoryResourceExhausted = Category{"resource_exhausted"} // quota, rate limit, or other budget exceeded.
|
||||
CategoryFailedDependency = Category{"failed_dependency"} // an upstream service we depend on failed (db, license, etc.).
|
||||
CategoryUnavailable = Category{"unavailable"} // service is temporarily down; retry with backoff.
|
||||
CategoryTimeout = Category{"timeout"} // deadline exceeded before the operation completed.
|
||||
CategoryCanceled = Category{"canceled"} // caller or context canceled the operation.
|
||||
CategoryUnimplemented = Category{"unimplemented"} // operation is not supported (or not yet) by this server.
|
||||
CategoryDataLoss = Category{"data_loss"} // unrecoverable data corruption or loss detected.
|
||||
CategoryInternal = Category{"internal"} // bug — invariant broken; should not occur in normal operation.
|
||||
)
|
||||
|
||||
// Fault attributes responsibility. An agent uses this to decide whether to
|
||||
// fix the request (Caller), retry/escalate (Server, Upstream), or page a
|
||||
// human (Operator).
|
||||
type Fault struct{ s string }
|
||||
|
||||
func (f Fault) String() string { return f.s }
|
||||
|
||||
var (
|
||||
FaultCaller = Fault{"caller"}
|
||||
FaultServer = Fault{"server"}
|
||||
FaultUpstream = Fault{"upstream"}
|
||||
FaultOperator = Fault{"operator"}
|
||||
)
|
||||
|
||||
// RetryPolicy tells the caller how to behave on retry. Backoff implies the
|
||||
// caller should use its own backoff schedule; After means honor Retry.After
|
||||
// exactly; AfterFix and AfterAuth signal that retry is pointless until the
|
||||
// caller fixes the request or re-authenticates.
|
||||
type RetryPolicy struct{ s string }
|
||||
|
||||
func (r RetryPolicy) String() string { return r.s }
|
||||
|
||||
var (
|
||||
RetryNever = RetryPolicy{"never"}
|
||||
RetryImmediate = RetryPolicy{"immediate"}
|
||||
RetryBackoff = RetryPolicy{"backoff"}
|
||||
RetryAfter = RetryPolicy{"after"}
|
||||
RetryAfterFix = RetryPolicy{"after_fix"}
|
||||
RetryAfterAuth = RetryPolicy{"after_auth"}
|
||||
)
|
||||
|
||||
// Remediation names the single recommended next action. It does not execute.
|
||||
type Remediation struct{ s string }
|
||||
|
||||
func (r Remediation) String() string { return r.s }
|
||||
|
||||
var (
|
||||
RemediationNone = Remediation{"none"}
|
||||
RemediationFixInput = Remediation{"fix_input"}
|
||||
RemediationReauthenticate = Remediation{"reauthenticate"}
|
||||
RemediationWaitAndRetry = Remediation{"wait_and_retry"}
|
||||
RemediationFailover = Remediation{"failover"}
|
||||
RemediationContactOperator = Remediation{"contact_operator"}
|
||||
RemediationFileBug = Remediation{"file_bug"}
|
||||
RemediationUpgradeLicense = Remediation{"upgrade_license"}
|
||||
)
|
||||
|
||||
// RefKind classifies a reference URL attached to the error.
|
||||
type RefKind struct{ s string }
|
||||
|
||||
func (r RefKind) String() string { return r.s }
|
||||
|
||||
var (
|
||||
RefDocs = RefKind{"docs"}
|
||||
RefRunbook = RefKind{"runbook"}
|
||||
RefDashboard = RefKind{"dashboard"}
|
||||
RefTrace = RefKind{"trace"}
|
||||
RefSource = RefKind{"source"}
|
||||
RefIssue = RefKind{"issue"}
|
||||
)
|
||||
@@ -1,248 +0,0 @@
|
||||
// Package v2 is the redesigned pkg/errors.
|
||||
//
|
||||
// Every branch-worthy field on the Error struct is a closed enum and every
|
||||
// variable part is a typed key/value. The intent is to make errors first-class
|
||||
// data for programmatic consumers — SDK clients, UI surfaces, alerting, and
|
||||
// LLM agents — without sacrificing human readability.
|
||||
//
|
||||
// Domain and op are encoded into Code (e.g. "query.parse.unknown_function")
|
||||
// rather than carried as separate struct fields. Frames[0] is the
|
||||
// authoritative call-site location, captured at construction time.
|
||||
package v2
|
||||
|
||||
import (
|
||||
stderrors "errors"
|
||||
"fmt"
|
||||
"io"
|
||||
"sort"
|
||||
"strconv"
|
||||
"strings"
|
||||
)
|
||||
|
||||
// Error is the redesigned error value. *Error is the canonical form passed
|
||||
// around — the zero value is unused, construct via New / Newf / Wrap / Wrapf.
|
||||
//
|
||||
// Frames are intentionally not a struct field: resolving captured PCs into
|
||||
// func/file/line is the dominant construction cost, so we capture PCs eagerly
|
||||
// at construction time (so the snapshot is faithful to the call site) and
|
||||
// resolve them lazily via Frames() only when something actually inspects them.
|
||||
type Error struct {
|
||||
// WHAT
|
||||
Category Category
|
||||
Code Code
|
||||
Title string
|
||||
Detail string
|
||||
|
||||
// WHY / WHO
|
||||
Cause error
|
||||
Fault Fault
|
||||
|
||||
// WHAT NEXT
|
||||
Retry Retry
|
||||
Remediation Remediation
|
||||
Refs map[RefKind]string
|
||||
|
||||
// CONTEXT
|
||||
Attrs map[string]any
|
||||
TraceID string
|
||||
SpanID string
|
||||
|
||||
// stack is the captured PCs plus a memoized []Frame; never read directly,
|
||||
// always go through Frames().
|
||||
stack *frameStack
|
||||
}
|
||||
|
||||
// Frames returns the captured stack, resolved to func/file/line on first
|
||||
// access. Frames[0] is the constructor's caller. Safe for concurrent use.
|
||||
func (e *Error) Frames() []Frame {
|
||||
if e == nil {
|
||||
return nil
|
||||
}
|
||||
return e.stack.frames()
|
||||
}
|
||||
|
||||
// New creates an Error for a registered Code. Defaults from the registered
|
||||
// Meta are applied first; opts override per call site.
|
||||
func New(code Code, title string, opts ...Option) *Error {
|
||||
e := &Error{Code: code, Title: title, stack: captureStack(3)}
|
||||
applyMeta(e)
|
||||
for _, opt := range opts {
|
||||
opt(e)
|
||||
}
|
||||
return e
|
||||
}
|
||||
|
||||
// Newf is New with fmt.Sprintf-style formatting for the title.
|
||||
func Newf(code Code, format string, args ...any) *Error {
|
||||
e := &Error{Code: code, Title: fmt.Sprintf(format, args...), stack: captureStack(3)}
|
||||
applyMeta(e)
|
||||
return e
|
||||
}
|
||||
|
||||
// Wrap creates an Error that wraps cause. The new error's Title is the
|
||||
// caller-supplied title (not the cause's message), so Error() reports what
|
||||
// went wrong at this layer — the cause is reachable via Unwrap.
|
||||
func Wrap(cause error, code Code, title string, opts ...Option) *Error {
|
||||
e := &Error{Code: code, Title: title, Cause: cause, stack: captureStack(3)}
|
||||
applyMeta(e)
|
||||
for _, opt := range opts {
|
||||
opt(e)
|
||||
}
|
||||
return e
|
||||
}
|
||||
|
||||
// Wrapf is Wrap with fmt.Sprintf-style formatting for the title.
|
||||
func Wrapf(cause error, code Code, format string, args ...any) *Error {
|
||||
e := &Error{Code: code, Title: fmt.Sprintf(format, args...), Cause: cause, stack: captureStack(3)}
|
||||
applyMeta(e)
|
||||
return e
|
||||
}
|
||||
|
||||
// applyMeta copies default values from the registered Meta into a fresh
|
||||
// Error. It runs before per-call options so options win.
|
||||
func applyMeta(e *Error) {
|
||||
meta, ok := MetaOf(e.Code)
|
||||
if !ok {
|
||||
return
|
||||
}
|
||||
if (e.Category == Category{}) {
|
||||
e.Category = meta.Category
|
||||
}
|
||||
if (e.Fault == Fault{}) {
|
||||
e.Fault = meta.Fault
|
||||
}
|
||||
if (e.Retry == Retry{}) {
|
||||
e.Retry = meta.Retry
|
||||
}
|
||||
if (e.Remediation == Remediation{}) {
|
||||
e.Remediation = meta.Remediation
|
||||
}
|
||||
if len(meta.Refs) > 0 {
|
||||
if e.Refs == nil {
|
||||
e.Refs = make(map[RefKind]string, len(meta.Refs))
|
||||
}
|
||||
for k, v := range meta.Refs {
|
||||
if _, exists := e.Refs[k]; !exists {
|
||||
e.Refs[k] = v
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Error returns the Title (the message specifically attached at this wrap
|
||||
// site), not the cause's message. This fixes the v1 surprise where Error()
|
||||
// returned the wrapped cause's text.
|
||||
func (e *Error) Error() string {
|
||||
if e == nil {
|
||||
return "<nil>"
|
||||
}
|
||||
return e.Title
|
||||
}
|
||||
|
||||
// Unwrap returns the wrapped cause, enabling errors.Is / errors.As.
|
||||
func (e *Error) Unwrap() error {
|
||||
if e == nil {
|
||||
return nil
|
||||
}
|
||||
return e.Cause
|
||||
}
|
||||
|
||||
// Format implements fmt.Formatter.
|
||||
//
|
||||
// %s, %v → Title only
|
||||
// %+v → full chain: code, title, frames, attrs, recursive cause
|
||||
func (e *Error) Format(f fmt.State, verb rune) {
|
||||
switch verb {
|
||||
case 's':
|
||||
_, _ = io.WriteString(f, e.Title)
|
||||
case 'v':
|
||||
if f.Flag('+') {
|
||||
_, _ = io.WriteString(f, e.fullString())
|
||||
return
|
||||
}
|
||||
_, _ = io.WriteString(f, e.Title)
|
||||
case 'q':
|
||||
fmt.Fprintf(f, "%q", e.Title)
|
||||
default:
|
||||
fmt.Fprintf(f, "%%!%c(*errors/v2.Error)", verb)
|
||||
}
|
||||
}
|
||||
|
||||
// fullString produces the %+v rendering. Format is intentionally
|
||||
// human-readable rather than machine-parseable; consumers that want structure
|
||||
// should marshal to JSON.
|
||||
func (e *Error) fullString() string {
|
||||
var b strings.Builder
|
||||
e.appendFull(&b, 0)
|
||||
return b.String()
|
||||
}
|
||||
|
||||
func (e *Error) appendFull(b *strings.Builder, depth int) {
|
||||
indent := strings.Repeat(" ", depth)
|
||||
fmt.Fprintf(b, "%s[%s] %s\n", indent, e.Code.s, e.Title)
|
||||
if e.Detail != "" {
|
||||
fmt.Fprintf(b, "%s detail: %s\n", indent, e.Detail)
|
||||
}
|
||||
if len(e.Attrs) > 0 {
|
||||
// Stable key order for deterministic output.
|
||||
keys := make([]string, 0, len(e.Attrs))
|
||||
for k := range e.Attrs {
|
||||
keys = append(keys, k)
|
||||
}
|
||||
sort.Strings(keys)
|
||||
fmt.Fprintf(b, "%s attrs:\n", indent)
|
||||
for _, k := range keys {
|
||||
fmt.Fprintf(b, "%s %s=%v\n", indent, k, e.Attrs[k])
|
||||
}
|
||||
}
|
||||
if frames := e.Frames(); len(frames) > 0 {
|
||||
fmt.Fprintf(b, "%s frames:\n", indent)
|
||||
for _, fr := range frames {
|
||||
fmt.Fprintf(b, "%s %s\n%s %s:%s\n", indent, fr.Func, indent, fr.File, strconv.Itoa(fr.Line))
|
||||
}
|
||||
}
|
||||
if e.Cause != nil {
|
||||
fmt.Fprintf(b, "%scaused by:\n", indent)
|
||||
var ce *Error
|
||||
if stderrors.As(e.Cause, &ce) && ce != nil {
|
||||
ce.appendFull(b, depth+1)
|
||||
} else {
|
||||
fmt.Fprintf(b, "%s %s\n", indent, e.Cause.Error())
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// AsError extracts a *Error from anywhere in err's wrap chain. It is the
|
||||
// common shortcut around errors.As for code that always wants this package's
|
||||
// type.
|
||||
func AsError(err error) (*Error, bool) {
|
||||
if err == nil {
|
||||
return nil, false
|
||||
}
|
||||
var e *Error
|
||||
if stderrors.As(err, &e) {
|
||||
return e, true
|
||||
}
|
||||
return nil, false
|
||||
}
|
||||
|
||||
// Is reports whether err or any error in its chain has the given Code.
|
||||
// Convenience wrapper that's friendlier than errors.As at call sites that
|
||||
// only care about code identity.
|
||||
func Is(err error, code Code) bool {
|
||||
e, ok := AsError(err)
|
||||
if !ok {
|
||||
return false
|
||||
}
|
||||
for e != nil {
|
||||
if e.Code == code {
|
||||
return true
|
||||
}
|
||||
next, ok := AsError(e.Cause)
|
||||
if !ok {
|
||||
return false
|
||||
}
|
||||
e = next
|
||||
}
|
||||
return false
|
||||
}
|
||||
@@ -1,90 +0,0 @@
|
||||
package v2
|
||||
|
||||
// This file is a self-contained walkthrough of how a domain integrates with
|
||||
// pkg/errors/v2. It mirrors what a real pkg/<domain>/errors.go looks like in
|
||||
// practice — registering codes, constructing typed errors at failure sites,
|
||||
// and consuming them at API boundaries. The "example.*" namespace is reserved
|
||||
// for these demo codes so they never collide with a real domain's
|
||||
// registrations.
|
||||
|
||||
// 1. Register codes at package init time. Each Register call panics on
|
||||
// malformed code or duplicate registration, so misconfiguration is caught
|
||||
// at process boot, not at first failure.
|
||||
var (
|
||||
// A caller-fault, fix-the-input error: rejected before any work happens.
|
||||
exampleCodeInvalidQuery = Register("example.query.invalid_filter", Meta{
|
||||
Category: CategoryInvalidInput,
|
||||
Fault: FaultCaller,
|
||||
Remediation: RemediationFixInput,
|
||||
Retry: Retry{Policy: RetryAfterFix},
|
||||
Refs: map[RefKind]string{
|
||||
RefDocs: "https://signoz.io/docs/query/filters",
|
||||
},
|
||||
})
|
||||
|
||||
// A quota error: the caller's request was well-formed but their plan
|
||||
// doesn't allow it. The recommended remediation is structural (upgrade),
|
||||
// not "try again later."
|
||||
exampleCodeQuotaExceeded = Register("example.billing.quota_exceeded", Meta{
|
||||
Category: CategoryResourceExhausted,
|
||||
Fault: FaultCaller,
|
||||
Remediation: RemediationUpgradeLicense,
|
||||
Retry: Retry{Policy: RetryNever},
|
||||
})
|
||||
)
|
||||
|
||||
// 2. Construct errors at the failure site. Notice that variable parts of
|
||||
// the message (the offending field, the limits) live in typed Attrs, not in
|
||||
// the title prose — a downstream agent can read them without parsing English.
|
||||
func exampleRejectInvalidFilter(field string) *Error {
|
||||
return New(exampleCodeInvalidQuery, "filter is not supported",
|
||||
WithAttr("field", field),
|
||||
)
|
||||
}
|
||||
|
||||
// 3. Consume errors at the API boundary. Branching on Category gives the
|
||||
// HTTP status; Retry tells an SDK how to behave; Fault drives logging
|
||||
// classification (caller errors are warnings, server/upstream errors page).
|
||||
func exampleClassifyForHTTP(err error) (status int, retry RetryPolicy) {
|
||||
e, ok := AsError(err)
|
||||
if !ok {
|
||||
return 500, RetryNever
|
||||
}
|
||||
switch e.Category {
|
||||
case CategoryInvalidInput, CategoryPrecondition:
|
||||
status = 400
|
||||
case CategoryUnauthenticated:
|
||||
status = 401
|
||||
case CategoryForbidden:
|
||||
status = 403
|
||||
case CategoryNotFound:
|
||||
status = 404
|
||||
case CategoryConflict, CategoryAlreadyExists:
|
||||
status = 409
|
||||
case CategoryResourceExhausted:
|
||||
status = 429
|
||||
case CategoryUnavailable, CategoryTimeout:
|
||||
status = 503
|
||||
case CategoryUnimplemented:
|
||||
status = 501
|
||||
default:
|
||||
status = 500
|
||||
}
|
||||
return status, e.Retry.Policy
|
||||
}
|
||||
|
||||
// 4. Identify a specific failure mode by Code. Is walks the cause chain so
|
||||
// a wrapper at the HTTP layer still matches when the root cause was raised
|
||||
// deep in the call graph.
|
||||
func exampleIsQuotaExceeded(err error) bool {
|
||||
return Is(err, exampleCodeQuotaExceeded)
|
||||
}
|
||||
|
||||
// The example helpers are reference-only: they exist to document call-site
|
||||
// patterns, not to be called from anywhere in the binary. This anchor keeps
|
||||
// them visible to readers (and the linter) without exporting demo code.
|
||||
var _ = []any{
|
||||
exampleRejectInvalidFilter,
|
||||
exampleClassifyForHTTP,
|
||||
exampleIsQuotaExceeded,
|
||||
}
|
||||
@@ -1,65 +0,0 @@
|
||||
package v2
|
||||
|
||||
import (
|
||||
"runtime"
|
||||
"sync"
|
||||
)
|
||||
|
||||
// Frame is a single line in the call stack. Frames[0] is the constructor's
|
||||
// caller — the authoritative "where this error came from" — and downstream
|
||||
// consumers can filter (e.g. "frames inside our code") without regex
|
||||
// reparsing of a pre-formatted stack string.
|
||||
type Frame struct {
|
||||
Func string `json:"func,omitempty"`
|
||||
File string `json:"file,omitempty"`
|
||||
Line int `json:"line,omitempty"`
|
||||
}
|
||||
|
||||
// frameStack carries the PCs captured at construction plus the resolved
|
||||
// []Frame slice, behind a sync.Once. Resolving frames into func/file/line is
|
||||
// expensive (runtime.CallersFrames walks the symbol table); the vast majority
|
||||
// of errors are constructed and never inspected, so we only pay that cost
|
||||
// when a consumer actually asks for frames (Frames()/MarshalJSON/%+v).
|
||||
//
|
||||
// The PC capture itself is cheap and happens at construction so that
|
||||
// Frames[0] is a faithful "where" record of the original call site.
|
||||
type frameStack struct {
|
||||
pcs []uintptr
|
||||
|
||||
once sync.Once
|
||||
resolved []Frame
|
||||
}
|
||||
|
||||
// captureStack is called by every constructor. skip drops runtime.Callers,
|
||||
// captureStack itself, and the constructor frame so that the first PC is the
|
||||
// user code that invoked the constructor.
|
||||
func captureStack(skip int) *frameStack {
|
||||
const depth = 32
|
||||
pcs := make([]uintptr, depth)
|
||||
n := runtime.Callers(skip, pcs)
|
||||
if n == 0 {
|
||||
return nil
|
||||
}
|
||||
return &frameStack{pcs: pcs[:n:n]}
|
||||
}
|
||||
|
||||
// frames resolves the captured PCs into []Frame. The resolution is memoized
|
||||
// — concurrent calls are safe and only one of them does the work.
|
||||
func (s *frameStack) frames() []Frame {
|
||||
if s == nil {
|
||||
return nil
|
||||
}
|
||||
s.once.Do(func() {
|
||||
cf := runtime.CallersFrames(s.pcs)
|
||||
out := make([]Frame, 0, len(s.pcs))
|
||||
for {
|
||||
f, more := cf.Next()
|
||||
out = append(out, Frame{Func: f.Function, File: f.File, Line: f.Line})
|
||||
if !more {
|
||||
break
|
||||
}
|
||||
}
|
||||
s.resolved = out
|
||||
})
|
||||
return s.resolved
|
||||
}
|
||||
@@ -1,175 +0,0 @@
|
||||
package v2
|
||||
|
||||
import (
|
||||
"encoding/json"
|
||||
"net/url"
|
||||
)
|
||||
|
||||
// CodeUnknown is the sentinel returned when AsJSON / AsURLValues are called
|
||||
// on a non-*Error. A consumer that sees this on the wire should read it as
|
||||
// "the producer did not raise a v2 Error and we projected it through the
|
||||
// fallback path" — i.e. somewhere upstream is still using std errors or v1.
|
||||
var CodeUnknown = Register("unknown.unset", Meta{
|
||||
Category: CategoryInternal,
|
||||
Fault: FaultServer,
|
||||
Retry: Retry{Policy: RetryNever},
|
||||
})
|
||||
|
||||
// JSON is the wire envelope for an Error. It is intentionally a superset of
|
||||
// v1's pkg/errors.JSON: SDK clients that only read v1's {code, message, url,
|
||||
// errors[]} keep working, while v2 consumers can branch on the new typed
|
||||
// fields (category, fault, retry, remediation, attrs, refs, cause).
|
||||
type JSON struct {
|
||||
Code string `json:"code" required:"true"`
|
||||
Title string `json:"title" required:"true"`
|
||||
Detail string `json:"detail,omitempty"`
|
||||
Category string `json:"category,omitempty"`
|
||||
Fault string `json:"fault,omitempty"`
|
||||
Retry *RetryJSON `json:"retry,omitempty"`
|
||||
Remediation string `json:"remediation,omitempty"`
|
||||
Attrs map[string]any `json:"attrs,omitempty"`
|
||||
Refs map[string]string `json:"refs,omitempty"`
|
||||
Frames []Frame `json:"frames,omitempty"`
|
||||
TraceID string `json:"trace_id,omitempty"`
|
||||
SpanID string `json:"span_id,omitempty"`
|
||||
Cause *CauseJSON `json:"cause,omitempty"`
|
||||
}
|
||||
|
||||
// RetryJSON renders Retry as an object so consumers can branch on policy
|
||||
// before consulting AfterMS. AfterMS is omitted unless policy is "after".
|
||||
type RetryJSON struct {
|
||||
Policy string `json:"policy"`
|
||||
AfterMS int64 `json:"after_ms,omitempty"`
|
||||
}
|
||||
|
||||
// CauseJSON is the thin recursive shape for a cause chain. Only code, title,
|
||||
// and a nested cause are guaranteed — producers may add more, consumers must
|
||||
// not rely on it.
|
||||
type CauseJSON struct {
|
||||
Code string `json:"code,omitempty"`
|
||||
Title string `json:"title"`
|
||||
Cause *CauseJSON `json:"cause,omitempty"`
|
||||
}
|
||||
|
||||
// AsJSON projects any error onto the v2 wire envelope. If cause is a
|
||||
// *Error (anywhere in its wrap chain) every field is filled from it;
|
||||
// otherwise the result is a CodeUnknown envelope with Title=cause.Error()
|
||||
// so the wire shape is always valid and never panics.
|
||||
func AsJSON(cause error) *JSON {
|
||||
if cause == nil {
|
||||
return nil
|
||||
}
|
||||
e, ok := AsError(cause)
|
||||
if !ok {
|
||||
return &JSON{
|
||||
Code: CodeUnknown.s,
|
||||
Title: cause.Error(),
|
||||
Category: CategoryInternal.s,
|
||||
Fault: FaultServer.s,
|
||||
}
|
||||
}
|
||||
return errorToJSON(e)
|
||||
}
|
||||
|
||||
func errorToJSON(e *Error) *JSON {
|
||||
out := &JSON{
|
||||
Code: e.Code.s,
|
||||
Title: e.Title,
|
||||
Detail: e.Detail,
|
||||
Category: e.Category.s,
|
||||
Fault: e.Fault.s,
|
||||
Remediation: e.Remediation.s,
|
||||
Attrs: e.Attrs,
|
||||
TraceID: e.TraceID,
|
||||
SpanID: e.SpanID,
|
||||
}
|
||||
if (e.Retry.Policy != RetryPolicy{}) {
|
||||
out.Retry = &RetryJSON{Policy: e.Retry.Policy.s}
|
||||
if e.Retry.Policy == RetryAfter && e.Retry.After > 0 {
|
||||
out.Retry.AfterMS = e.Retry.After.Milliseconds()
|
||||
}
|
||||
}
|
||||
if len(e.Refs) > 0 {
|
||||
out.Refs = make(map[string]string, len(e.Refs))
|
||||
for k, v := range e.Refs {
|
||||
out.Refs[k.s] = v
|
||||
}
|
||||
}
|
||||
if frames := e.Frames(); len(frames) > 0 {
|
||||
out.Frames = frames
|
||||
}
|
||||
if e.Cause != nil {
|
||||
out.Cause = causeToJSON(e.Cause)
|
||||
}
|
||||
return out
|
||||
}
|
||||
|
||||
func causeToJSON(err error) *CauseJSON {
|
||||
if err == nil {
|
||||
return nil
|
||||
}
|
||||
if e, ok := err.(*Error); ok {
|
||||
c := &CauseJSON{Code: e.Code.s, Title: e.Title}
|
||||
if e.Cause != nil {
|
||||
c.Cause = causeToJSON(e.Cause)
|
||||
}
|
||||
return c
|
||||
}
|
||||
// Non-*Error leaf: only Title is set, no Code.
|
||||
return &CauseJSON{Title: err.Error()}
|
||||
}
|
||||
|
||||
// AsURLValues projects an error onto a flat url.Values, matching v1's shape
|
||||
// for callers (e.g. OAuth/SSO redirects) that smuggle errors back through a
|
||||
// query string. Complex fields (attrs, refs, retry, frames, cause) are
|
||||
// JSON-marshaled into a single value rather than spread across multiple
|
||||
// keys, since query strings have no good representation for nested data.
|
||||
func AsURLValues(cause error) url.Values {
|
||||
j := AsJSON(cause)
|
||||
if j == nil {
|
||||
return url.Values{}
|
||||
}
|
||||
v := url.Values{
|
||||
"code": {j.Code},
|
||||
"title": {j.Title},
|
||||
}
|
||||
if j.Detail != "" {
|
||||
v.Set("detail", j.Detail)
|
||||
}
|
||||
if j.Category != "" {
|
||||
v.Set("category", j.Category)
|
||||
}
|
||||
if j.Fault != "" {
|
||||
v.Set("fault", j.Fault)
|
||||
}
|
||||
if j.Remediation != "" {
|
||||
v.Set("remediation", j.Remediation)
|
||||
}
|
||||
if j.TraceID != "" {
|
||||
v.Set("trace_id", j.TraceID)
|
||||
}
|
||||
if j.SpanID != "" {
|
||||
v.Set("span_id", j.SpanID)
|
||||
}
|
||||
if j.Retry != nil {
|
||||
if b, err := json.Marshal(j.Retry); err == nil {
|
||||
v.Set("retry", string(b))
|
||||
}
|
||||
}
|
||||
if len(j.Refs) > 0 {
|
||||
if b, err := json.Marshal(j.Refs); err == nil {
|
||||
v.Set("refs", string(b))
|
||||
}
|
||||
}
|
||||
if len(j.Attrs) > 0 {
|
||||
if b, err := json.Marshal(j.Attrs); err == nil {
|
||||
v.Set("attrs", string(b))
|
||||
}
|
||||
}
|
||||
if j.Cause != nil {
|
||||
if b, err := json.Marshal(j.Cause); err == nil {
|
||||
v.Set("cause", string(b))
|
||||
}
|
||||
}
|
||||
return v
|
||||
}
|
||||
@@ -1,85 +0,0 @@
|
||||
package v2
|
||||
|
||||
import "time"
|
||||
|
||||
// Option mutates an Error during construction. Options are applied after the
|
||||
// registered Meta defaults so a per-call WithFault wins over the code's
|
||||
// default Fault.
|
||||
type Option func(*Error)
|
||||
|
||||
// WithTitle overrides the title (used when Newf's formatted string is not
|
||||
// what you want, or after a Wrap that took its title from the cause).
|
||||
func WithTitle(s string) Option { return func(e *Error) { e.Title = s } }
|
||||
|
||||
// WithDetail adds a long, user-safe explanation. Detail must never include
|
||||
// raw cause text; the cause is already in the chain.
|
||||
func WithDetail(s string) Option { return func(e *Error) { e.Detail = s } }
|
||||
|
||||
// WithCategory overrides the registered Category.
|
||||
func WithCategory(c Category) Option { return func(e *Error) { e.Category = c } }
|
||||
|
||||
// WithFault overrides the registered Fault.
|
||||
func WithFault(f Fault) Option { return func(e *Error) { e.Fault = f } }
|
||||
|
||||
// WithRetry overrides the registered Retry.
|
||||
func WithRetry(r Retry) Option { return func(e *Error) { e.Retry = r } }
|
||||
|
||||
// WithRetryAfter is a convenience for the common RetryAfter case.
|
||||
func WithRetryAfter(d time.Duration) Option {
|
||||
return func(e *Error) { e.Retry = Retry{Policy: RetryAfter, After: d} }
|
||||
}
|
||||
|
||||
// WithRemediation overrides the registered Remediation.
|
||||
//
|
||||
// Convention for "did you mean" hints: stash a []string under
|
||||
// Attrs["suggestions"], ranked best-first. Each element should be a complete,
|
||||
// copy-pasteable replacement — not an explanation of what went wrong (use
|
||||
// WithDetail for that). Once 3-4 domains adopt the convention identically,
|
||||
// promote to a first-class field.
|
||||
func WithRemediation(r Remediation) Option { return func(e *Error) { e.Remediation = r } }
|
||||
|
||||
// WithRef adds (or replaces) a single reference URL keyed by kind.
|
||||
func WithRef(kind RefKind, url string) Option {
|
||||
return func(e *Error) {
|
||||
if e.Refs == nil {
|
||||
e.Refs = make(map[RefKind]string, 1)
|
||||
}
|
||||
e.Refs[kind] = url
|
||||
}
|
||||
}
|
||||
|
||||
// WithAttr sets a single typed attribute. Prefer typed per-domain helpers
|
||||
// (e.g. WithQueryAttrs(q Query)) over raw WithAttr at call sites — they keep
|
||||
// the attr keys consistent and let the compiler reject typos.
|
||||
func WithAttr(key string, value any) Option {
|
||||
return func(e *Error) {
|
||||
if e.Attrs == nil {
|
||||
e.Attrs = make(map[string]any, 1)
|
||||
}
|
||||
e.Attrs[key] = value
|
||||
}
|
||||
}
|
||||
|
||||
// WithAttrs merges a map of attributes; later keys win.
|
||||
func WithAttrs(attrs map[string]any) Option {
|
||||
return func(e *Error) {
|
||||
if len(attrs) == 0 {
|
||||
return
|
||||
}
|
||||
if e.Attrs == nil {
|
||||
e.Attrs = make(map[string]any, len(attrs))
|
||||
}
|
||||
for k, v := range attrs {
|
||||
e.Attrs[k] = v
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// WithTrace stamps the error with OTel trace and span IDs so the JSON
|
||||
// response can link back to the originating span.
|
||||
func WithTrace(traceID, spanID string) Option {
|
||||
return func(e *Error) {
|
||||
e.TraceID = traceID
|
||||
e.SpanID = spanID
|
||||
}
|
||||
}
|
||||
@@ -69,3 +69,27 @@ func (h *handler) ListPods(rw http.ResponseWriter, req *http.Request) {
|
||||
|
||||
render.Success(rw, http.StatusOK, result)
|
||||
}
|
||||
|
||||
func (h *handler) ListNodes(rw http.ResponseWriter, req *http.Request) {
|
||||
claims, err := authtypes.ClaimsFromContext(req.Context())
|
||||
if err != nil {
|
||||
render.Error(rw, err)
|
||||
return
|
||||
}
|
||||
|
||||
orgID := valuer.MustNewUUID(claims.OrgID)
|
||||
|
||||
var parsedReq inframonitoringtypes.PostableNodes
|
||||
if err := binding.JSON.BindBody(req.Body, &parsedReq); err != nil {
|
||||
render.Error(rw, err)
|
||||
return
|
||||
}
|
||||
|
||||
result, err := h.module.ListNodes(req.Context(), orgID, &parsedReq)
|
||||
if err != nil {
|
||||
render.Error(rw, err)
|
||||
return
|
||||
}
|
||||
|
||||
render.Success(rw, http.StatusOK, result)
|
||||
}
|
||||
|
||||
@@ -174,7 +174,7 @@ func buildHostRecords(
|
||||
Wait: -1,
|
||||
Load15: -1,
|
||||
DiskUsage: -1,
|
||||
Meta: map[string]any{},
|
||||
Meta: map[string]string{},
|
||||
}
|
||||
|
||||
if metrics, ok := metricsMap[compositeKey]; ok {
|
||||
|
||||
@@ -23,3 +23,9 @@ type podPhaseCounts struct {
|
||||
Failed int
|
||||
Unknown int
|
||||
}
|
||||
|
||||
// nodeConditionCounts holds per-group node counts bucketed by latest condition_ready in window.
|
||||
type nodeConditionCounts struct {
|
||||
Ready int
|
||||
NotReady int
|
||||
}
|
||||
|
||||
@@ -242,3 +242,98 @@ func (m *module) ListPods(ctx context.Context, orgID valuer.UUID, req *inframoni
|
||||
|
||||
return resp, nil
|
||||
}
|
||||
|
||||
func (m *module) ListNodes(ctx context.Context, orgID valuer.UUID, req *inframonitoringtypes.PostableNodes) (*inframonitoringtypes.Nodes, error) {
|
||||
if err := req.Validate(); err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
resp := &inframonitoringtypes.Nodes{}
|
||||
|
||||
if req.OrderBy == nil {
|
||||
req.OrderBy = &qbtypes.OrderBy{
|
||||
Key: qbtypes.OrderByKey{
|
||||
TelemetryFieldKey: telemetrytypes.TelemetryFieldKey{
|
||||
Name: inframonitoringtypes.NodesOrderByCPU,
|
||||
},
|
||||
},
|
||||
Direction: qbtypes.OrderDirectionDesc,
|
||||
}
|
||||
}
|
||||
|
||||
if len(req.GroupBy) == 0 {
|
||||
req.GroupBy = []qbtypes.GroupByKey{nodeNameGroupByKey}
|
||||
resp.Type = inframonitoringtypes.ResponseTypeList
|
||||
} else {
|
||||
resp.Type = inframonitoringtypes.ResponseTypeGroupedList
|
||||
}
|
||||
|
||||
missingMetrics, minFirstReportedUnixMilli, err := m.getMetricsExistenceAndEarliestTime(ctx, nodesTableMetricNamesList)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
if len(missingMetrics) > 0 {
|
||||
resp.RequiredMetricsCheck = inframonitoringtypes.RequiredMetricsCheck{MissingMetrics: missingMetrics}
|
||||
resp.Records = []inframonitoringtypes.NodeRecord{}
|
||||
resp.Total = 0
|
||||
return resp, nil
|
||||
}
|
||||
if req.End < int64(minFirstReportedUnixMilli) {
|
||||
resp.EndTimeBeforeRetention = true
|
||||
resp.Records = []inframonitoringtypes.NodeRecord{}
|
||||
resp.Total = 0
|
||||
return resp, nil
|
||||
}
|
||||
resp.RequiredMetricsCheck = inframonitoringtypes.RequiredMetricsCheck{MissingMetrics: []string{}}
|
||||
|
||||
metadataMap, err := m.getNodesTableMetadata(ctx, req)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
resp.Total = len(metadataMap)
|
||||
|
||||
pageGroups, err := m.getTopNodeGroups(ctx, orgID, req, metadataMap)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
if len(pageGroups) == 0 {
|
||||
resp.Records = []inframonitoringtypes.NodeRecord{}
|
||||
return resp, nil
|
||||
}
|
||||
|
||||
filterExpr := ""
|
||||
if req.Filter != nil {
|
||||
filterExpr = req.Filter.Expression
|
||||
}
|
||||
|
||||
fullQueryReq := buildFullQueryRequest(req.Start, req.End, filterExpr, req.GroupBy, pageGroups, m.newNodesTableListQuery())
|
||||
queryResp, err := m.querier.QueryRange(ctx, orgID, fullQueryReq)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
nodeConditionCounts, err := m.getPerGroupNodeConditionCounts(ctx, req, pageGroups)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
// Reuse the pods phase-counts CTE function via a temp struct — it reads only
|
||||
// Start/End/Filter/GroupBy from PostablePods.
|
||||
podPhaseCounts, err := m.getPerGroupPodPhaseCounts(ctx, &inframonitoringtypes.PostablePods{
|
||||
Start: req.Start,
|
||||
End: req.End,
|
||||
Filter: req.Filter,
|
||||
GroupBy: req.GroupBy,
|
||||
}, pageGroups)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
isNodeNameInGroupBy := isKeyInGroupByAttrs(req.GroupBy, nodeNameAttrKey)
|
||||
resp.Records = buildNodeRecords(isNodeNameInGroupBy, queryResp, pageGroups, req.GroupBy, metadataMap, nodeConditionCounts, podPhaseCounts)
|
||||
resp.Warning = queryResp.Warning
|
||||
|
||||
return resp, nil
|
||||
}
|
||||
|
||||
312
pkg/modules/inframonitoring/implinframonitoring/nodes.go
Normal file
312
pkg/modules/inframonitoring/implinframonitoring/nodes.go
Normal file
@@ -0,0 +1,312 @@
|
||||
package implinframonitoring
|
||||
|
||||
import (
|
||||
"context"
|
||||
"fmt"
|
||||
"slices"
|
||||
"strings"
|
||||
|
||||
"github.com/SigNoz/signoz/pkg/querybuilder"
|
||||
"github.com/SigNoz/signoz/pkg/telemetrymetrics"
|
||||
"github.com/SigNoz/signoz/pkg/types/inframonitoringtypes"
|
||||
"github.com/SigNoz/signoz/pkg/types/metrictypes"
|
||||
qbtypes "github.com/SigNoz/signoz/pkg/types/querybuildertypes/querybuildertypesv5"
|
||||
"github.com/SigNoz/signoz/pkg/valuer"
|
||||
"github.com/huandu/go-sqlbuilder"
|
||||
)
|
||||
|
||||
// buildNodeRecords assembles the page records. Condition counts come from
|
||||
// conditionCounts in both modes. In list mode (isNodeNameInGroupBy=true) each
|
||||
// group is one node, so exactly one count is 1; Condition is derived from
|
||||
// which one. In grouped_list mode Condition stays NodeConditionNoData.
|
||||
func buildNodeRecords(
|
||||
isNodeNameInGroupBy bool,
|
||||
resp *qbtypes.QueryRangeResponse,
|
||||
pageGroups []map[string]string,
|
||||
groupBy []qbtypes.GroupByKey,
|
||||
metadataMap map[string]map[string]string,
|
||||
nodeConditionCounts map[string]nodeConditionCounts,
|
||||
podPhaseCounts map[string]podPhaseCounts,
|
||||
) []inframonitoringtypes.NodeRecord {
|
||||
metricsMap := parseFullQueryResponse(resp, groupBy)
|
||||
|
||||
records := make([]inframonitoringtypes.NodeRecord, 0, len(pageGroups))
|
||||
for _, labels := range pageGroups {
|
||||
compositeKey := compositeKeyFromLabels(labels, groupBy)
|
||||
nodeName := labels[nodeNameAttrKey]
|
||||
|
||||
record := inframonitoringtypes.NodeRecord{ // initialize with default values
|
||||
NodeName: nodeName,
|
||||
Condition: inframonitoringtypes.NodeConditionNoData,
|
||||
NodeCPU: -1,
|
||||
NodeCPUAllocatable: -1,
|
||||
NodeMemory: -1,
|
||||
NodeMemoryAllocatable: -1,
|
||||
Meta: map[string]string{},
|
||||
}
|
||||
|
||||
if metrics, ok := metricsMap[compositeKey]; ok {
|
||||
if v, exists := metrics["A"]; exists {
|
||||
record.NodeCPU = v
|
||||
}
|
||||
if v, exists := metrics["B"]; exists {
|
||||
record.NodeCPUAllocatable = v
|
||||
}
|
||||
if v, exists := metrics["C"]; exists {
|
||||
record.NodeMemory = v
|
||||
}
|
||||
if v, exists := metrics["D"]; exists {
|
||||
record.NodeMemoryAllocatable = v
|
||||
}
|
||||
}
|
||||
|
||||
if nodeConditionCountsForGroup, ok := nodeConditionCounts[compositeKey]; ok {
|
||||
record.NodeCountsByReadiness = inframonitoringtypes.NodeCountsByReadiness{
|
||||
Ready: nodeConditionCountsForGroup.Ready,
|
||||
NotReady: nodeConditionCountsForGroup.NotReady,
|
||||
}
|
||||
|
||||
// In list mode each group is one node; the count==1 bucket identifies the condition.
|
||||
if isNodeNameInGroupBy {
|
||||
switch {
|
||||
case nodeConditionCountsForGroup.Ready == 1:
|
||||
record.Condition = inframonitoringtypes.NodeConditionReady
|
||||
case nodeConditionCountsForGroup.NotReady == 1:
|
||||
record.Condition = inframonitoringtypes.NodeConditionNotReady
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if podPhaseCountsForGroup, ok := podPhaseCounts[compositeKey]; ok {
|
||||
record.PodCountsByPhase = inframonitoringtypes.PodCountsByPhase{
|
||||
Pending: podPhaseCountsForGroup.Pending,
|
||||
Running: podPhaseCountsForGroup.Running,
|
||||
Succeeded: podPhaseCountsForGroup.Succeeded,
|
||||
Failed: podPhaseCountsForGroup.Failed,
|
||||
Unknown: podPhaseCountsForGroup.Unknown,
|
||||
}
|
||||
}
|
||||
|
||||
if attrs, ok := metadataMap[compositeKey]; ok {
|
||||
for k, v := range attrs {
|
||||
record.Meta[k] = v
|
||||
}
|
||||
}
|
||||
|
||||
records = append(records, record)
|
||||
}
|
||||
return records
|
||||
}
|
||||
|
||||
func (m *module) getTopNodeGroups(
|
||||
ctx context.Context,
|
||||
orgID valuer.UUID,
|
||||
req *inframonitoringtypes.PostableNodes,
|
||||
metadataMap map[string]map[string]string,
|
||||
) ([]map[string]string, error) {
|
||||
orderByKey := req.OrderBy.Key.Name
|
||||
queryNamesForOrderBy := orderByToNodesQueryNames[orderByKey]
|
||||
rankingQueryName := queryNamesForOrderBy[len(queryNamesForOrderBy)-1]
|
||||
|
||||
topReq := &qbtypes.QueryRangeRequest{
|
||||
Start: uint64(req.Start),
|
||||
End: uint64(req.End),
|
||||
RequestType: qbtypes.RequestTypeScalar,
|
||||
CompositeQuery: qbtypes.CompositeQuery{
|
||||
Queries: make([]qbtypes.QueryEnvelope, 0, len(queryNamesForOrderBy)),
|
||||
},
|
||||
}
|
||||
|
||||
for _, envelope := range m.newNodesTableListQuery().CompositeQuery.Queries {
|
||||
if !slices.Contains(queryNamesForOrderBy, envelope.GetQueryName()) {
|
||||
continue
|
||||
}
|
||||
copied := envelope
|
||||
if copied.Type == qbtypes.QueryTypeBuilder {
|
||||
existingExpr := ""
|
||||
if f := copied.GetFilter(); f != nil {
|
||||
existingExpr = f.Expression
|
||||
}
|
||||
reqFilterExpr := ""
|
||||
if req.Filter != nil {
|
||||
reqFilterExpr = req.Filter.Expression
|
||||
}
|
||||
merged := mergeFilterExpressions(existingExpr, reqFilterExpr)
|
||||
copied.SetFilter(&qbtypes.Filter{Expression: merged})
|
||||
copied.SetGroupBy(req.GroupBy)
|
||||
}
|
||||
topReq.CompositeQuery.Queries = append(topReq.CompositeQuery.Queries, copied)
|
||||
}
|
||||
|
||||
resp, err := m.querier.QueryRange(ctx, orgID, topReq)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
allMetricGroups := parseAndSortGroups(resp, rankingQueryName, req.GroupBy, req.OrderBy.Direction)
|
||||
return paginateWithBackfill(allMetricGroups, metadataMap, req.GroupBy, req.Offset, req.Limit), nil
|
||||
}
|
||||
|
||||
func (m *module) getNodesTableMetadata(ctx context.Context, req *inframonitoringtypes.PostableNodes) (map[string]map[string]string, error) {
|
||||
var nonGroupByAttrs []string
|
||||
for _, key := range nodeAttrKeysForMetadata {
|
||||
if !isKeyInGroupByAttrs(req.GroupBy, key) {
|
||||
nonGroupByAttrs = append(nonGroupByAttrs, key)
|
||||
}
|
||||
}
|
||||
return m.getMetadata(ctx, nodesTableMetricNamesList, req.GroupBy, nonGroupByAttrs, req.Filter, req.Start, req.End)
|
||||
}
|
||||
|
||||
// getPerGroupNodeConditionCounts computes per-group node counts bucketed by each
|
||||
// node's latest condition_ready value (0 / 1) in the requested window.
|
||||
// Pipeline:
|
||||
//
|
||||
// timeSeriesFPs: fp ↔ (node_name, groupBy cols) from the time_series table.
|
||||
// User filter + page-groups filter applied here.
|
||||
// latestConditionPerNode: INNER JOIN samples × timeSeriesFPs, collapsed to
|
||||
// the latest condition value per node via argMax(value, unix_milli).
|
||||
// countNodesPerCondition: per-group uniqExactIf into ready/not_ready buckets.
|
||||
//
|
||||
// Groups absent from the result map have implicit zero counts (caller default).
|
||||
func (m *module) getPerGroupNodeConditionCounts(
|
||||
ctx context.Context,
|
||||
req *inframonitoringtypes.PostableNodes,
|
||||
pageGroups []map[string]string,
|
||||
) (map[string]nodeConditionCounts, error) {
|
||||
if len(pageGroups) == 0 || len(req.GroupBy) == 0 {
|
||||
return map[string]nodeConditionCounts{}, nil
|
||||
}
|
||||
|
||||
// Merged filter expression (user filter + page-groups IN clauses).
|
||||
reqFilterExpr := ""
|
||||
if req.Filter != nil {
|
||||
reqFilterExpr = req.Filter.Expression
|
||||
}
|
||||
pageGroupsFilterExpr := buildPageGroupsFilterExpr(pageGroups)
|
||||
filterExpr := mergeFilterExpressions(reqFilterExpr, pageGroupsFilterExpr)
|
||||
|
||||
// Resolve tables. Same convention as pods.
|
||||
adjustedStart, adjustedEnd, _, localTimeSeriesTable := telemetrymetrics.WhichTSTableToUse(
|
||||
uint64(req.Start), uint64(req.End), nil,
|
||||
)
|
||||
samplesTable := telemetrymetrics.WhichSamplesTableToUse(
|
||||
uint64(req.Start), uint64(req.End),
|
||||
metrictypes.UnspecifiedType, metrictypes.TimeAggregationUnspecified, nil,
|
||||
)
|
||||
valueCol := telemetrymetrics.ValueColumnForSamplesTable(samplesTable)
|
||||
|
||||
// ----- timeSeriesFPs -----
|
||||
timeSeriesFPs := sqlbuilder.NewSelectBuilder()
|
||||
timeSeriesFPsSelectCols := []string{
|
||||
"fingerprint",
|
||||
fmt.Sprintf("JSONExtractString(labels, %s) AS node_name", timeSeriesFPs.Var(nodeNameAttrKey)),
|
||||
}
|
||||
for _, key := range req.GroupBy {
|
||||
timeSeriesFPsSelectCols = append(timeSeriesFPsSelectCols,
|
||||
fmt.Sprintf("JSONExtractString(labels, %s) AS %s", timeSeriesFPs.Var(key.Name), quoteIdentifier(key.Name)),
|
||||
)
|
||||
}
|
||||
timeSeriesFPs.Select(timeSeriesFPsSelectCols...)
|
||||
timeSeriesFPs.From(fmt.Sprintf("%s.%s", telemetrymetrics.DBName, localTimeSeriesTable))
|
||||
timeSeriesFPs.Where(
|
||||
timeSeriesFPs.E("metric_name", nodeConditionMetricName),
|
||||
timeSeriesFPs.GE("unix_milli", adjustedStart),
|
||||
timeSeriesFPs.L("unix_milli", adjustedEnd),
|
||||
)
|
||||
if filterExpr != "" {
|
||||
filterClause, err := m.buildFilterClause(ctx, &qbtypes.Filter{Expression: filterExpr}, req.Start, req.End)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
if filterClause != nil {
|
||||
timeSeriesFPs.AddWhereClause(filterClause)
|
||||
}
|
||||
}
|
||||
timeSeriesFPsGroupBy := []string{"fingerprint", "node_name"}
|
||||
for _, key := range req.GroupBy {
|
||||
timeSeriesFPsGroupBy = append(timeSeriesFPsGroupBy, quoteIdentifier(key.Name))
|
||||
}
|
||||
timeSeriesFPs.GroupBy(timeSeriesFPsGroupBy...)
|
||||
timeSeriesFPsSQL, timeSeriesFPsArgs := timeSeriesFPs.BuildWithFlavor(sqlbuilder.ClickHouse)
|
||||
|
||||
// ----- latestConditionPerNode -----
|
||||
latestConditionPerNode := sqlbuilder.NewSelectBuilder()
|
||||
latestConditionPerNodeSelectCols := []string{"tsfp.node_name AS node_name"}
|
||||
latestConditionPerNodeGroupBy := []string{"node_name"}
|
||||
for _, key := range req.GroupBy {
|
||||
col := quoteIdentifier(key.Name)
|
||||
latestConditionPerNodeSelectCols = append(latestConditionPerNodeSelectCols, fmt.Sprintf("tsfp.%s AS %s", col, col))
|
||||
latestConditionPerNodeGroupBy = append(latestConditionPerNodeGroupBy, col)
|
||||
}
|
||||
latestConditionPerNodeSelectCols = append(latestConditionPerNodeSelectCols,
|
||||
fmt.Sprintf("argMax(samples.%s, samples.unix_milli) AS condition_value", valueCol),
|
||||
)
|
||||
latestConditionPerNode.Select(latestConditionPerNodeSelectCols...)
|
||||
latestConditionPerNode.From(fmt.Sprintf(
|
||||
"%s.%s AS samples INNER JOIN time_series_fps AS tsfp ON samples.fingerprint = tsfp.fingerprint",
|
||||
telemetrymetrics.DBName, samplesTable,
|
||||
))
|
||||
latestConditionPerNode.Where(
|
||||
latestConditionPerNode.E("samples.metric_name", nodeConditionMetricName),
|
||||
latestConditionPerNode.GE("samples.unix_milli", req.Start),
|
||||
latestConditionPerNode.L("samples.unix_milli", req.End),
|
||||
"tsfp.node_name != ''",
|
||||
)
|
||||
latestConditionPerNode.GroupBy(latestConditionPerNodeGroupBy...)
|
||||
latestConditionPerNodeSQL, latestConditionPerNodeArgs := latestConditionPerNode.BuildWithFlavor(sqlbuilder.ClickHouse)
|
||||
|
||||
// ----- countNodesPerCondition (outer SELECT) -----
|
||||
countNodesPerConditionSelectCols := make([]string, 0, len(req.GroupBy)+2)
|
||||
countNodesPerConditionGroupBy := make([]string, 0, len(req.GroupBy))
|
||||
for _, key := range req.GroupBy {
|
||||
col := quoteIdentifier(key.Name)
|
||||
countNodesPerConditionSelectCols = append(countNodesPerConditionSelectCols, col)
|
||||
countNodesPerConditionGroupBy = append(countNodesPerConditionGroupBy, col)
|
||||
}
|
||||
countNodesPerConditionSelectCols = append(countNodesPerConditionSelectCols,
|
||||
fmt.Sprintf("uniqExactIf(node_name, condition_value = %d) AS ready_count", inframonitoringtypes.NodeConditionNumReady),
|
||||
fmt.Sprintf("uniqExactIf(node_name, condition_value = %d) AS not_ready_count", inframonitoringtypes.NodeConditionNumNotReady),
|
||||
)
|
||||
countNodesPerConditionSQL := fmt.Sprintf(
|
||||
"SELECT %s FROM latest_condition_per_node GROUP BY %s",
|
||||
strings.Join(countNodesPerConditionSelectCols, ", "),
|
||||
strings.Join(countNodesPerConditionGroupBy, ", "),
|
||||
)
|
||||
|
||||
// Combine CTEs + outer.
|
||||
cteFragments := []string{
|
||||
fmt.Sprintf("time_series_fps AS (%s)", timeSeriesFPsSQL),
|
||||
fmt.Sprintf("latest_condition_per_node AS (%s)", latestConditionPerNodeSQL),
|
||||
}
|
||||
finalSQL := querybuilder.CombineCTEs(cteFragments) + countNodesPerConditionSQL
|
||||
finalArgs := querybuilder.PrependArgs([][]any{timeSeriesFPsArgs, latestConditionPerNodeArgs}, nil)
|
||||
|
||||
rows, err := m.telemetryStore.ClickhouseDB().Query(ctx, finalSQL, finalArgs...)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
defer rows.Close()
|
||||
|
||||
result := make(map[string]nodeConditionCounts)
|
||||
for rows.Next() {
|
||||
groupVals := make([]string, len(req.GroupBy))
|
||||
scanPtrs := make([]any, 0, len(req.GroupBy)+2)
|
||||
for i := range groupVals {
|
||||
scanPtrs = append(scanPtrs, &groupVals[i])
|
||||
}
|
||||
var ready, notReady uint64
|
||||
scanPtrs = append(scanPtrs, &ready, ¬Ready)
|
||||
|
||||
if err := rows.Scan(scanPtrs...); err != nil {
|
||||
return nil, err
|
||||
}
|
||||
result[compositeKeyFromList(groupVals)] = nodeConditionCounts{
|
||||
Ready: int(ready),
|
||||
NotReady: int(notReady),
|
||||
}
|
||||
}
|
||||
if err := rows.Err(); err != nil {
|
||||
return nil, err
|
||||
}
|
||||
return result, nil
|
||||
}
|
||||
@@ -0,0 +1,133 @@
|
||||
package implinframonitoring
|
||||
|
||||
import (
|
||||
"github.com/SigNoz/signoz/pkg/types/inframonitoringtypes"
|
||||
"github.com/SigNoz/signoz/pkg/types/metrictypes"
|
||||
qbtypes "github.com/SigNoz/signoz/pkg/types/querybuildertypes/querybuildertypesv5"
|
||||
"github.com/SigNoz/signoz/pkg/types/telemetrytypes"
|
||||
)
|
||||
|
||||
const (
|
||||
nodeNameAttrKey = "k8s.node.name"
|
||||
nodeConditionMetricName = "k8s.node.condition_ready"
|
||||
)
|
||||
|
||||
var nodeNameGroupByKey = qbtypes.GroupByKey{
|
||||
TelemetryFieldKey: telemetrytypes.TelemetryFieldKey{
|
||||
Name: nodeNameAttrKey,
|
||||
FieldContext: telemetrytypes.FieldContextResource,
|
||||
FieldDataType: telemetrytypes.FieldDataTypeString,
|
||||
},
|
||||
}
|
||||
|
||||
// nodesTableMetricNamesList drives the existence/retention check.
|
||||
// Includes condition_ready and pod.phase also.
|
||||
var nodesTableMetricNamesList = []string{
|
||||
"k8s.node.cpu.usage",
|
||||
"k8s.node.allocatable_cpu",
|
||||
"k8s.node.memory.working_set",
|
||||
"k8s.node.allocatable_memory",
|
||||
"k8s.node.condition_ready",
|
||||
"k8s.pod.phase",
|
||||
}
|
||||
|
||||
var nodeAttrKeysForMetadata = []string{
|
||||
"k8s.node.uid",
|
||||
"k8s.cluster.name",
|
||||
}
|
||||
|
||||
var orderByToNodesQueryNames = map[string][]string{
|
||||
inframonitoringtypes.NodesOrderByCPU: {"A"},
|
||||
inframonitoringtypes.NodesOrderByCPUAllocatable: {"B"},
|
||||
inframonitoringtypes.NodesOrderByMemory: {"C"},
|
||||
inframonitoringtypes.NodesOrderByMemoryAllocatable: {"D"},
|
||||
}
|
||||
|
||||
// newNodesTableListQuery builds the composite QB v5 request for the nodes list.
|
||||
// Node condition is derived separately via getPerGroupNodeConditionCounts (works
|
||||
// for both list and grouped_list modes), so no condition query is included here.
|
||||
func (m *module) newNodesTableListQuery() *qbtypes.QueryRangeRequest {
|
||||
queries := []qbtypes.QueryEnvelope{
|
||||
// Query A: CPU usage
|
||||
{
|
||||
Type: qbtypes.QueryTypeBuilder,
|
||||
Spec: qbtypes.QueryBuilderQuery[qbtypes.MetricAggregation]{
|
||||
Name: "A",
|
||||
Signal: telemetrytypes.SignalMetrics,
|
||||
Aggregations: []qbtypes.MetricAggregation{
|
||||
{
|
||||
MetricName: "k8s.node.cpu.usage",
|
||||
TimeAggregation: metrictypes.TimeAggregationAvg,
|
||||
SpaceAggregation: metrictypes.SpaceAggregationSum,
|
||||
ReduceTo: qbtypes.ReduceToAvg,
|
||||
},
|
||||
},
|
||||
GroupBy: []qbtypes.GroupByKey{nodeNameGroupByKey},
|
||||
Disabled: false,
|
||||
},
|
||||
},
|
||||
// Query B: CPU allocatable.
|
||||
// TimeAggregationLatest is the closest v5 equivalent of v1's AnyLast;
|
||||
// allocatable values change rarely so divergence in practice is negligible.
|
||||
{
|
||||
Type: qbtypes.QueryTypeBuilder,
|
||||
Spec: qbtypes.QueryBuilderQuery[qbtypes.MetricAggregation]{
|
||||
Name: "B",
|
||||
Signal: telemetrytypes.SignalMetrics,
|
||||
Aggregations: []qbtypes.MetricAggregation{
|
||||
{
|
||||
MetricName: "k8s.node.allocatable_cpu",
|
||||
TimeAggregation: metrictypes.TimeAggregationLatest,
|
||||
SpaceAggregation: metrictypes.SpaceAggregationSum,
|
||||
ReduceTo: qbtypes.ReduceToAvg,
|
||||
},
|
||||
},
|
||||
GroupBy: []qbtypes.GroupByKey{nodeNameGroupByKey},
|
||||
Disabled: false,
|
||||
},
|
||||
},
|
||||
// Query C: Memory working set
|
||||
{
|
||||
Type: qbtypes.QueryTypeBuilder,
|
||||
Spec: qbtypes.QueryBuilderQuery[qbtypes.MetricAggregation]{
|
||||
Name: "C",
|
||||
Signal: telemetrytypes.SignalMetrics,
|
||||
Aggregations: []qbtypes.MetricAggregation{
|
||||
{
|
||||
MetricName: "k8s.node.memory.working_set",
|
||||
TimeAggregation: metrictypes.TimeAggregationAvg,
|
||||
SpaceAggregation: metrictypes.SpaceAggregationSum,
|
||||
ReduceTo: qbtypes.ReduceToAvg,
|
||||
},
|
||||
},
|
||||
GroupBy: []qbtypes.GroupByKey{nodeNameGroupByKey},
|
||||
Disabled: false,
|
||||
},
|
||||
},
|
||||
// Query D: Memory allocatable. Same Latest caveat as Query B.
|
||||
{
|
||||
Type: qbtypes.QueryTypeBuilder,
|
||||
Spec: qbtypes.QueryBuilderQuery[qbtypes.MetricAggregation]{
|
||||
Name: "D",
|
||||
Signal: telemetrytypes.SignalMetrics,
|
||||
Aggregations: []qbtypes.MetricAggregation{
|
||||
{
|
||||
MetricName: "k8s.node.allocatable_memory",
|
||||
TimeAggregation: metrictypes.TimeAggregationLatest,
|
||||
SpaceAggregation: metrictypes.SpaceAggregationSum,
|
||||
ReduceTo: qbtypes.ReduceToAvg,
|
||||
},
|
||||
},
|
||||
GroupBy: []qbtypes.GroupByKey{nodeNameGroupByKey},
|
||||
Disabled: false,
|
||||
},
|
||||
},
|
||||
}
|
||||
|
||||
return &qbtypes.QueryRangeRequest{
|
||||
RequestType: qbtypes.RequestTypeScalar,
|
||||
CompositeQuery: qbtypes.CompositeQuery{
|
||||
Queries: queries,
|
||||
},
|
||||
}
|
||||
}
|
||||
@@ -19,7 +19,7 @@ import (
|
||||
// buildPodRecords assembles the page records. Phase counts come from
|
||||
// phaseCounts in both modes. In list mode (isPodUIDInGroupBy=true) each
|
||||
// group is one pod, so exactly one count is 1; PodPhase is derived from
|
||||
// which one. In grouped_list mode PodPhase stays PodPhaseNone.
|
||||
// which one. In grouped_list mode PodPhase stays PodPhaseNoData.
|
||||
func buildPodRecords(
|
||||
isPodUIDInGroupBy bool,
|
||||
resp *qbtypes.QueryRangeResponse,
|
||||
@@ -38,7 +38,7 @@ func buildPodRecords(
|
||||
|
||||
record := inframonitoringtypes.PodRecord{ // initialize with default values
|
||||
PodUID: podUID,
|
||||
PodPhase: inframonitoringtypes.PodPhaseNone,
|
||||
PodPhase: inframonitoringtypes.PodPhaseNoData,
|
||||
PodCPU: -1,
|
||||
PodCPURequest: -1,
|
||||
PodCPULimit: -1,
|
||||
@@ -46,7 +46,7 @@ func buildPodRecords(
|
||||
PodMemoryRequest: -1,
|
||||
PodMemoryLimit: -1,
|
||||
PodAge: -1,
|
||||
Meta: map[string]any{},
|
||||
Meta: map[string]string{},
|
||||
}
|
||||
|
||||
if metrics, ok := metricsMap[compositeKey]; ok {
|
||||
@@ -71,11 +71,13 @@ func buildPodRecords(
|
||||
}
|
||||
|
||||
if phaseCountsForGroup, ok := phaseCounts[compositeKey]; ok {
|
||||
record.PendingPodCount = phaseCountsForGroup.Pending
|
||||
record.RunningPodCount = phaseCountsForGroup.Running
|
||||
record.SucceededPodCount = phaseCountsForGroup.Succeeded
|
||||
record.FailedPodCount = phaseCountsForGroup.Failed
|
||||
record.UnknownPodCount = phaseCountsForGroup.Unknown
|
||||
record.PodCountsByPhase = inframonitoringtypes.PodCountsByPhase{
|
||||
Pending: phaseCountsForGroup.Pending,
|
||||
Running: phaseCountsForGroup.Running,
|
||||
Succeeded: phaseCountsForGroup.Succeeded,
|
||||
Failed: phaseCountsForGroup.Failed,
|
||||
Unknown: phaseCountsForGroup.Unknown,
|
||||
}
|
||||
|
||||
// In list mode each group is one pod; the count==1 bucket identifies the phase.
|
||||
if isPodUIDInGroupBy {
|
||||
|
||||
@@ -11,9 +11,11 @@ import (
|
||||
type Handler interface {
|
||||
ListHosts(http.ResponseWriter, *http.Request)
|
||||
ListPods(http.ResponseWriter, *http.Request)
|
||||
ListNodes(http.ResponseWriter, *http.Request)
|
||||
}
|
||||
|
||||
type Module interface {
|
||||
ListHosts(ctx context.Context, orgID valuer.UUID, req *inframonitoringtypes.PostableHosts) (*inframonitoringtypes.Hosts, error)
|
||||
ListPods(ctx context.Context, orgID valuer.UUID, req *inframonitoringtypes.PostablePods) (*inframonitoringtypes.Pods, error)
|
||||
ListNodes(ctx context.Context, orgID valuer.UUID, req *inframonitoringtypes.PostableNodes) (*inframonitoringtypes.Nodes, error)
|
||||
}
|
||||
|
||||
@@ -1,40 +1,15 @@
|
||||
package querybuilder
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
"sort"
|
||||
"strings"
|
||||
|
||||
errors "github.com/SigNoz/signoz/pkg/errors/v2"
|
||||
"github.com/SigNoz/signoz/pkg/errors"
|
||||
grammar "github.com/SigNoz/signoz/pkg/parser/havingexpression/grammar"
|
||||
"github.com/antlr4-go/antlr/v4"
|
||||
"github.com/huandu/go-sqlbuilder"
|
||||
)
|
||||
|
||||
// HAVING-expression validator codes. All three are caller-fault, fix-the-input
|
||||
// errors — the user wrote an expression we cannot turn into SQL — so retry
|
||||
// is pointless until the expression itself changes.
|
||||
var (
|
||||
codeHavingStringLiteral = errors.Register("querybuilder.having.string_literal", errors.Meta{
|
||||
Category: errors.CategoryInvalidInput,
|
||||
Fault: errors.FaultCaller,
|
||||
Retry: errors.Retry{Policy: errors.RetryAfterFix},
|
||||
Remediation: errors.RemediationFixInput,
|
||||
})
|
||||
codeHavingInvalidReference = errors.Register("querybuilder.having.invalid_reference", errors.Meta{
|
||||
Category: errors.CategoryInvalidInput,
|
||||
Fault: errors.FaultCaller,
|
||||
Retry: errors.Retry{Policy: errors.RetryAfterFix},
|
||||
Remediation: errors.RemediationFixInput,
|
||||
})
|
||||
codeHavingSyntaxError = errors.Register("querybuilder.having.syntax_error", errors.Meta{
|
||||
Category: errors.CategoryInvalidInput,
|
||||
Fault: errors.FaultCaller,
|
||||
Retry: errors.Retry{Policy: errors.RetryAfterFix},
|
||||
Remediation: errors.RemediationFixInput,
|
||||
})
|
||||
)
|
||||
|
||||
// havingExpressionRewriteVisitor walks the parse tree of a HavingExpression in a single
|
||||
// pass, simultaneously rewriting user-facing references to their SQL column names and
|
||||
// collecting any references that could not be resolved.
|
||||
@@ -306,10 +281,10 @@ func (r *HavingExpressionRewriter) rewriteAndValidate(expression string) (string
|
||||
// This is checked before invalid references so that "contains string literals" takes
|
||||
// priority when a bare string literal is also an unresolvable operand.
|
||||
if v.hasStringLiteral {
|
||||
return "", errors.New(codeHavingStringLiteral,
|
||||
return "", errors.NewInvalidInputf(
|
||||
errors.CodeInvalidInput,
|
||||
"`Having` expression contains string literals",
|
||||
errors.WithDetail("Aggregator results are numeric"),
|
||||
)
|
||||
).WithAdditional("Aggregator results are numeric")
|
||||
}
|
||||
|
||||
if len(v.invalid) > 0 {
|
||||
@@ -319,10 +294,7 @@ func (r *HavingExpressionRewriter) rewriteAndValidate(expression string) (string
|
||||
validKeys = append(validKeys, k)
|
||||
}
|
||||
sort.Strings(validKeys)
|
||||
opts := []errors.Option{
|
||||
errors.WithAttr("invalid_refs", v.invalid),
|
||||
errors.WithAttr("valid_refs", validKeys),
|
||||
}
|
||||
additional := []string{"Valid references are: [" + strings.Join(validKeys, ", ") + "]"}
|
||||
if len(v.invalid) == 1 {
|
||||
inv := v.invalid[0]
|
||||
// Only suggest for plain identifier typos, not for unresolved function
|
||||
@@ -331,13 +303,15 @@ func (r *HavingExpressionRewriter) rewriteAndValidate(expression string) (string
|
||||
// a simple string substitution produce a corrupt expression.
|
||||
isFuncCall := strings.Contains(original, inv+"(")
|
||||
if match, dist := closestMatch(inv, validKeys); !isFuncCall && !strings.Contains(match, "(") && dist <= 3 {
|
||||
opts = append(opts, errors.WithAttr("suggestions", []string{strings.ReplaceAll(original, inv, match)}))
|
||||
corrected := strings.ReplaceAll(original, inv, match)
|
||||
additional = append(additional, "Suggestion: `"+corrected+"`")
|
||||
}
|
||||
}
|
||||
return "", errors.New(codeHavingInvalidReference,
|
||||
fmt.Sprintf("Invalid references in `Having` expression: [%s]", strings.Join(v.invalid, ", ")),
|
||||
opts...,
|
||||
)
|
||||
return "", errors.NewInvalidInputf(
|
||||
errors.CodeInvalidInput,
|
||||
"Invalid references in `Having` expression: [%s]",
|
||||
strings.Join(v.invalid, ", "),
|
||||
).WithAdditional(additional...)
|
||||
}
|
||||
|
||||
// Layer 3 – ANTLR syntax errors. We parse the original expression, so error messages
|
||||
@@ -354,20 +328,17 @@ func (r *HavingExpressionRewriter) rewriteAndValidate(expression string) (string
|
||||
if detail == "" {
|
||||
detail = "check the expression syntax"
|
||||
}
|
||||
opts := []errors.Option{
|
||||
errors.WithDetail(detail),
|
||||
errors.WithAttr("syntax_errors", msgs),
|
||||
}
|
||||
additional := []string{detail}
|
||||
// For single-error expressions, try to produce an actionable suggestion.
|
||||
if len(allSyntaxErrors) == 1 {
|
||||
if s := havingSuggestion(allSyntaxErrors[0], original); s != "" {
|
||||
opts = append(opts, errors.WithAttr("suggestions", []string{s}))
|
||||
additional = append(additional, "Suggestion: `"+s+"`")
|
||||
}
|
||||
}
|
||||
return "", errors.New(codeHavingSyntaxError,
|
||||
return "", errors.NewInvalidInputf(
|
||||
errors.CodeInvalidInput,
|
||||
"Syntax error in `Having` expression",
|
||||
opts...,
|
||||
)
|
||||
).WithAdditional(additional...)
|
||||
}
|
||||
|
||||
return result, nil
|
||||
|
||||
@@ -200,7 +200,7 @@ func (t *telemetryMetaStore) getTracesKeys(ctx context.Context, fieldKeySelector
|
||||
`CASE
|
||||
// WHEN tagType = 'spanfield' THEN 1
|
||||
WHEN tagType = 'resource' THEN 2
|
||||
// WHEN tagType = 'scope' THEN 3
|
||||
WHEN tagType = 'scope' THEN 3
|
||||
WHEN tagType = 'tag' THEN 4
|
||||
ELSE 5
|
||||
END as priority`,
|
||||
|
||||
@@ -51,6 +51,7 @@ var (
|
||||
ValueType: schema.ColumnTypeString,
|
||||
}},
|
||||
"resource": {Name: "resource", Type: schema.JSONColumnType{}},
|
||||
"scope": {Name: "scope", Type: schema.JSONColumnType{}},
|
||||
|
||||
"events": {Name: "events", Type: schema.ArrayColumnType{
|
||||
ElementType: schema.ColumnTypeString,
|
||||
@@ -176,7 +177,7 @@ func (m *defaultFieldMapper) getColumn(
|
||||
case telemetrytypes.FieldContextResource:
|
||||
return []*schema.Column{indexV3Columns["resource"]}, nil
|
||||
case telemetrytypes.FieldContextScope:
|
||||
return []*schema.Column{}, qbtypes.ErrColumnNotFound
|
||||
return []*schema.Column{indexV3Columns["scope"]}, nil
|
||||
case telemetrytypes.FieldContextAttribute:
|
||||
switch key.FieldDataType {
|
||||
case telemetrytypes.FieldDataTypeString:
|
||||
@@ -261,21 +262,29 @@ func (m *defaultFieldMapper) FieldFor(
|
||||
|
||||
switch column.Type.GetType() {
|
||||
case schema.ColumnTypeEnumJSON:
|
||||
// json is only supported for resource context as of now
|
||||
if key.FieldContext != telemetrytypes.FieldContextResource {
|
||||
return "", errors.Newf(errors.TypeInvalidInput, errors.CodeInvalidInput, "only resource context fields are supported for json columns, got %s", key.FieldContext.String)
|
||||
}
|
||||
oldColumn := indexV3Columns["resources_string"]
|
||||
oldKeyName := fmt.Sprintf("%s['%s']", oldColumn.Name, key.Name)
|
||||
// have to add ::string as clickHouse throws an error :- data types Variant/Dynamic are not allowed in GROUP BY
|
||||
// once clickHouse dependency is updated, we need to check if we can remove it.
|
||||
if key.Materialized {
|
||||
oldKeyName = telemetrytypes.FieldKeyToMaterializedColumnName(key)
|
||||
oldKeyNameExists := telemetrytypes.FieldKeyToMaterializedColumnNameForExists(key)
|
||||
return fmt.Sprintf("multiIf(%s.`%s` IS NOT NULL, %s.`%s`::String, %s==true, %s, NULL)", column.Name, key.Name, column.Name, key.Name, oldKeyNameExists, oldKeyName), nil
|
||||
} else {
|
||||
return fmt.Sprintf("multiIf(%s.`%s` IS NOT NULL, %s.`%s`::String, mapContains(%s, '%s'), %s, NULL)", column.Name, key.Name, column.Name, key.Name, oldColumn.Name, key.Name, oldKeyName), nil
|
||||
switch key.FieldContext {
|
||||
case telemetrytypes.FieldContextScope:
|
||||
scopeKey, _ := strings.CutPrefix(key.Name, "scope.")
|
||||
switch scopeKey {
|
||||
case "name", "version":
|
||||
return fmt.Sprintf("%s.%s::String", column.Name, scopeKey), nil
|
||||
default:
|
||||
return fmt.Sprintf("%s.attributes.`%s`::String", column.Name, scopeKey), nil
|
||||
}
|
||||
case telemetrytypes.FieldContextResource:
|
||||
oldColumn := indexV3Columns["resources_string"]
|
||||
oldKeyName := fmt.Sprintf("%s['%s']", oldColumn.Name, key.Name)
|
||||
// have to add ::string as clickHouse throws an error :- data types Variant/Dynamic are not allowed in GROUP BY
|
||||
// once clickHouse dependency is updated, we need to check if we can remove it.
|
||||
if key.Materialized {
|
||||
oldKeyName = telemetrytypes.FieldKeyToMaterializedColumnName(key)
|
||||
oldKeyNameExists := telemetrytypes.FieldKeyToMaterializedColumnNameForExists(key)
|
||||
return fmt.Sprintf("multiIf(%s.`%s` IS NOT NULL, %s.`%s`::String, %s==true, %s, NULL)", column.Name, key.Name, column.Name, key.Name, oldKeyNameExists, oldKeyName), nil
|
||||
} else {
|
||||
return fmt.Sprintf("multiIf(%s.`%s` IS NOT NULL, %s.`%s`::String, mapContains(%s, '%s'), %s, NULL)", column.Name, key.Name, column.Name, key.Name, oldColumn.Name, key.Name, oldKeyName), nil
|
||||
}
|
||||
}
|
||||
return "", errors.Newf(errors.TypeInvalidInput, errors.CodeInvalidInput, "json column type only supported for resource and scope context, got %s", key.FieldContext.String)
|
||||
case schema.ColumnTypeEnumString,
|
||||
schema.ColumnTypeEnumUInt64,
|
||||
schema.ColumnTypeEnumUInt32,
|
||||
|
||||
@@ -78,6 +78,51 @@ func TestGetFieldKeyName(t *testing.T) {
|
||||
expectedResult: "multiIf(resource.`deployment.environment` IS NOT NULL, resource.`deployment.environment`::String, `resource_string_deployment$$environment_exists`==true, `resource_string_deployment$$environment`, NULL)",
|
||||
expectedError: nil,
|
||||
},
|
||||
{
|
||||
name: "Scope field - name",
|
||||
key: telemetrytypes.TelemetryFieldKey{
|
||||
Name: "name",
|
||||
FieldContext: telemetrytypes.FieldContextScope,
|
||||
},
|
||||
expectedResult: "scope.name::String",
|
||||
expectedError: nil,
|
||||
},
|
||||
{
|
||||
name: "Scope field - scope.name",
|
||||
key: telemetrytypes.TelemetryFieldKey{
|
||||
Name: "scope.name",
|
||||
FieldContext: telemetrytypes.FieldContextScope,
|
||||
},
|
||||
expectedResult: "scope.name::String",
|
||||
expectedError: nil,
|
||||
},
|
||||
{
|
||||
name: "Scope field - version",
|
||||
key: telemetrytypes.TelemetryFieldKey{
|
||||
Name: "version",
|
||||
FieldContext: telemetrytypes.FieldContextScope,
|
||||
},
|
||||
expectedResult: "scope.version::String",
|
||||
expectedError: nil,
|
||||
},
|
||||
{
|
||||
name: "Scope field - scope.version",
|
||||
key: telemetrytypes.TelemetryFieldKey{
|
||||
Name: "scope.version",
|
||||
FieldContext: telemetrytypes.FieldContextScope,
|
||||
},
|
||||
expectedResult: "scope.version::String",
|
||||
expectedError: nil,
|
||||
},
|
||||
{
|
||||
name: "Scope field - custom attribute",
|
||||
key: telemetrytypes.TelemetryFieldKey{
|
||||
Name: "custom.attr",
|
||||
FieldContext: telemetrytypes.FieldContextScope,
|
||||
},
|
||||
expectedResult: "scope.attributes.`custom.attr`::String",
|
||||
expectedError: nil,
|
||||
},
|
||||
{
|
||||
name: "Non-existent column",
|
||||
key: telemetrytypes.TelemetryFieldKey{
|
||||
|
||||
@@ -350,6 +350,66 @@ func TestStatementBuilder(t *testing.T) {
|
||||
},
|
||||
expectedErr: nil,
|
||||
},
|
||||
{
|
||||
name: "scope.name filter and group by",
|
||||
requestType: qbtypes.RequestTypeTimeSeries,
|
||||
query: qbtypes.QueryBuilderQuery[qbtypes.TraceAggregation]{
|
||||
Signal: telemetrytypes.SignalTraces,
|
||||
StepInterval: qbtypes.Step{Duration: 30 * time.Second},
|
||||
Aggregations: []qbtypes.TraceAggregation{
|
||||
{
|
||||
Expression: "count()",
|
||||
},
|
||||
},
|
||||
Filter: &qbtypes.Filter{
|
||||
Expression: "scope.name = 'opentelemetry-io'",
|
||||
},
|
||||
Limit: 10,
|
||||
GroupBy: []qbtypes.GroupByKey{
|
||||
{
|
||||
TelemetryFieldKey: telemetrytypes.TelemetryFieldKey{
|
||||
Name: "scope.name",
|
||||
FieldContext: telemetrytypes.FieldContextScope,
|
||||
},
|
||||
},
|
||||
},
|
||||
},
|
||||
expected: qbtypes.Statement{
|
||||
Query: "WITH __limit_cte AS (SELECT toString(multiIf(scope.name::String IS NOT NULL, scope.name::String, NULL)) AS `scope.name`, count() AS __result_0 FROM signoz_traces.distributed_signoz_index_v3 WHERE (scope.name::String = ? AND scope.name::String IS NOT NULL) AND timestamp >= ? AND timestamp < ? AND ts_bucket_start >= ? AND ts_bucket_start <= ? GROUP BY `scope.name` ORDER BY __result_0 DESC LIMIT ?) SELECT toStartOfInterval(timestamp, INTERVAL 30 SECOND) AS ts, toString(multiIf(scope.name::String IS NOT NULL, scope.name::String, NULL)) AS `scope.name`, count() AS __result_0 FROM signoz_traces.distributed_signoz_index_v3 WHERE (scope.name::String = ? AND scope.name::String IS NOT NULL) AND timestamp >= ? AND timestamp < ? AND ts_bucket_start >= ? AND ts_bucket_start <= ? AND (`scope.name`) GLOBAL IN (SELECT `scope.name` FROM __limit_cte) GROUP BY ts, `scope.name`",
|
||||
Args: []any{"opentelemetry-io", "1747947419000000000", "1747983448000000000", uint64(1747945619), uint64(1747983448), 10, "opentelemetry-io", "1747947419000000000", "1747983448000000000", uint64(1747945619), uint64(1747983448)},
|
||||
},
|
||||
expectedErr: nil,
|
||||
},
|
||||
{
|
||||
name: "scope.version filter with scope.name group by",
|
||||
requestType: qbtypes.RequestTypeTimeSeries,
|
||||
query: qbtypes.QueryBuilderQuery[qbtypes.TraceAggregation]{
|
||||
Signal: telemetrytypes.SignalTraces,
|
||||
StepInterval: qbtypes.Step{Duration: 30 * time.Second},
|
||||
Aggregations: []qbtypes.TraceAggregation{
|
||||
{
|
||||
Expression: "count()",
|
||||
},
|
||||
},
|
||||
Filter: &qbtypes.Filter{
|
||||
Expression: "scope.version = '1.0.0'",
|
||||
},
|
||||
Limit: 10,
|
||||
GroupBy: []qbtypes.GroupByKey{
|
||||
{
|
||||
TelemetryFieldKey: telemetrytypes.TelemetryFieldKey{
|
||||
Name: "scope.name",
|
||||
FieldContext: telemetrytypes.FieldContextScope,
|
||||
},
|
||||
},
|
||||
},
|
||||
},
|
||||
expected: qbtypes.Statement{
|
||||
Query: "WITH __limit_cte AS (SELECT toString(multiIf(scope.name::String IS NOT NULL, scope.name::String, NULL)) AS `scope.name`, count() AS __result_0 FROM signoz_traces.distributed_signoz_index_v3 WHERE (scope.version::String = ? AND scope.version::String IS NOT NULL) AND timestamp >= ? AND timestamp < ? AND ts_bucket_start >= ? AND ts_bucket_start <= ? GROUP BY `scope.name` ORDER BY __result_0 DESC LIMIT ?) SELECT toStartOfInterval(timestamp, INTERVAL 30 SECOND) AS ts, toString(multiIf(scope.name::String IS NOT NULL, scope.name::String, NULL)) AS `scope.name`, count() AS __result_0 FROM signoz_traces.distributed_signoz_index_v3 WHERE (scope.version::String = ? AND scope.version::String IS NOT NULL) AND timestamp >= ? AND timestamp < ? AND ts_bucket_start >= ? AND ts_bucket_start <= ? AND (`scope.name`) GLOBAL IN (SELECT `scope.name` FROM __limit_cte) GROUP BY ts, `scope.name`",
|
||||
Args: []any{"1.0.0", "1747947419000000000", "1747983448000000000", uint64(1747945619), uint64(1747983448), 10, "1.0.0", "1747947419000000000", "1747983448000000000", uint64(1747945619), uint64(1747983448)},
|
||||
},
|
||||
expectedErr: nil,
|
||||
},
|
||||
}
|
||||
|
||||
fm := NewFieldMapper()
|
||||
|
||||
@@ -111,6 +111,20 @@ func buildCompleteFieldKeyMap() map[string][]*telemetrytypes.TelemetryFieldKey {
|
||||
FieldDataType: telemetrytypes.FieldDataTypeBool,
|
||||
},
|
||||
},
|
||||
"scope.name": {
|
||||
{
|
||||
Name: "scope.name",
|
||||
FieldContext: telemetrytypes.FieldContextScope,
|
||||
FieldDataType: telemetrytypes.FieldDataTypeString,
|
||||
},
|
||||
},
|
||||
"scope.version": {
|
||||
{
|
||||
Name: "scope.version",
|
||||
FieldContext: telemetrytypes.FieldContextScope,
|
||||
FieldDataType: telemetrytypes.FieldDataTypeString,
|
||||
},
|
||||
},
|
||||
}
|
||||
for _, keys := range keysMap {
|
||||
for _, key := range keys {
|
||||
|
||||
@@ -18,16 +18,16 @@ type Hosts struct {
|
||||
}
|
||||
|
||||
type HostRecord struct {
|
||||
HostName string `json:"hostName" required:"true"`
|
||||
Status HostStatus `json:"status" required:"true"`
|
||||
ActiveHostCount int `json:"activeHostCount" required:"true"`
|
||||
InactiveHostCount int `json:"inactiveHostCount" required:"true"`
|
||||
CPU float64 `json:"cpu" required:"true"`
|
||||
Memory float64 `json:"memory" required:"true"`
|
||||
Wait float64 `json:"wait" required:"true"`
|
||||
Load15 float64 `json:"load15" required:"true"`
|
||||
DiskUsage float64 `json:"diskUsage" required:"true"`
|
||||
Meta map[string]interface{} `json:"meta" required:"true"`
|
||||
HostName string `json:"hostName" required:"true"`
|
||||
Status HostStatus `json:"status" required:"true"`
|
||||
ActiveHostCount int `json:"activeHostCount" required:"true"`
|
||||
InactiveHostCount int `json:"inactiveHostCount" required:"true"`
|
||||
CPU float64 `json:"cpu" required:"true"`
|
||||
Memory float64 `json:"memory" required:"true"`
|
||||
Wait float64 `json:"wait" required:"true"`
|
||||
Load15 float64 `json:"load15" required:"true"`
|
||||
DiskUsage float64 `json:"diskUsage" required:"true"`
|
||||
Meta map[string]string `json:"meta" required:"true"`
|
||||
}
|
||||
|
||||
type RequiredMetricsCheck struct {
|
||||
|
||||
110
pkg/types/inframonitoringtypes/nodes.go
Normal file
110
pkg/types/inframonitoringtypes/nodes.go
Normal file
@@ -0,0 +1,110 @@
|
||||
package inframonitoringtypes
|
||||
|
||||
import (
|
||||
"encoding/json"
|
||||
"slices"
|
||||
|
||||
"github.com/SigNoz/signoz/pkg/errors"
|
||||
qbtypes "github.com/SigNoz/signoz/pkg/types/querybuildertypes/querybuildertypesv5"
|
||||
)
|
||||
|
||||
type Nodes struct {
|
||||
Type ResponseType `json:"type" required:"true"`
|
||||
Records []NodeRecord `json:"records" required:"true"`
|
||||
Total int `json:"total" required:"true"`
|
||||
RequiredMetricsCheck RequiredMetricsCheck `json:"requiredMetricsCheck" required:"true"`
|
||||
EndTimeBeforeRetention bool `json:"endTimeBeforeRetention" required:"true"`
|
||||
Warning *qbtypes.QueryWarnData `json:"warning,omitempty"`
|
||||
}
|
||||
|
||||
// NodeCountsByReadiness buckets node counts by their latest k8s.node.condition_ready
|
||||
// value in the time window. Reusable across record types (node / cluster).
|
||||
type NodeCountsByReadiness struct {
|
||||
Ready int `json:"ready" required:"true"`
|
||||
NotReady int `json:"notReady" required:"true"`
|
||||
}
|
||||
|
||||
type NodeRecord struct {
|
||||
NodeName string `json:"nodeName" required:"true"`
|
||||
Condition NodeCondition `json:"condition" required:"true"`
|
||||
NodeCountsByReadiness NodeCountsByReadiness `json:"nodeCountsByReadiness" required:"true"`
|
||||
PodCountsByPhase PodCountsByPhase `json:"podCountsByPhase" required:"true"`
|
||||
NodeCPU float64 `json:"nodeCPU" required:"true"`
|
||||
NodeCPUAllocatable float64 `json:"nodeCPUAllocatable" required:"true"`
|
||||
NodeMemory float64 `json:"nodeMemory" required:"true"`
|
||||
NodeMemoryAllocatable float64 `json:"nodeMemoryAllocatable" required:"true"`
|
||||
Meta map[string]string `json:"meta" required:"true"`
|
||||
}
|
||||
|
||||
// PostableNodes is the request body for the v2 nodes list API.
|
||||
type PostableNodes struct {
|
||||
Start int64 `json:"start" required:"true"`
|
||||
End int64 `json:"end" required:"true"`
|
||||
Filter *qbtypes.Filter `json:"filter"`
|
||||
GroupBy []qbtypes.GroupByKey `json:"groupBy"`
|
||||
OrderBy *qbtypes.OrderBy `json:"orderBy"`
|
||||
Offset int `json:"offset"`
|
||||
Limit int `json:"limit" required:"true"`
|
||||
}
|
||||
|
||||
// Validate ensures PostableNodes contains acceptable values.
|
||||
func (req *PostableNodes) Validate() error {
|
||||
if req == nil {
|
||||
return errors.NewInvalidInputf(errors.CodeInvalidInput, "request is nil")
|
||||
}
|
||||
|
||||
if req.Start <= 0 {
|
||||
return errors.NewInvalidInputf(
|
||||
errors.CodeInvalidInput,
|
||||
"invalid start time %d: start must be greater than 0",
|
||||
req.Start,
|
||||
)
|
||||
}
|
||||
|
||||
if req.End <= 0 {
|
||||
return errors.NewInvalidInputf(
|
||||
errors.CodeInvalidInput,
|
||||
"invalid end time %d: end must be greater than 0",
|
||||
req.End,
|
||||
)
|
||||
}
|
||||
|
||||
if req.Start >= req.End {
|
||||
return errors.NewInvalidInputf(
|
||||
errors.CodeInvalidInput,
|
||||
"invalid time range: start (%d) must be less than end (%d)",
|
||||
req.Start,
|
||||
req.End,
|
||||
)
|
||||
}
|
||||
|
||||
if req.Limit < 1 || req.Limit > 5000 {
|
||||
return errors.NewInvalidInputf(errors.CodeInvalidInput, "limit must be between 1 and 5000")
|
||||
}
|
||||
|
||||
if req.Offset < 0 {
|
||||
return errors.NewInvalidInputf(errors.CodeInvalidInput, "offset cannot be negative")
|
||||
}
|
||||
|
||||
if req.OrderBy != nil {
|
||||
if !slices.Contains(NodesValidOrderByKeys, req.OrderBy.Key.Name) {
|
||||
return errors.NewInvalidInputf(errors.CodeInvalidInput, "invalid order by key: %s", req.OrderBy.Key.Name)
|
||||
}
|
||||
if req.OrderBy.Direction != qbtypes.OrderDirectionAsc && req.OrderBy.Direction != qbtypes.OrderDirectionDesc {
|
||||
return errors.NewInvalidInputf(errors.CodeInvalidInput, "invalid order by direction: %s", req.OrderBy.Direction)
|
||||
}
|
||||
}
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
// UnmarshalJSON validates input immediately after decoding.
|
||||
func (req *PostableNodes) UnmarshalJSON(data []byte) error {
|
||||
type raw PostableNodes
|
||||
var decoded raw
|
||||
if err := json.Unmarshal(data, &decoded); err != nil {
|
||||
return err
|
||||
}
|
||||
*req = PostableNodes(decoded)
|
||||
return req.Validate()
|
||||
}
|
||||
42
pkg/types/inframonitoringtypes/nodes_constants.go
Normal file
42
pkg/types/inframonitoringtypes/nodes_constants.go
Normal file
@@ -0,0 +1,42 @@
|
||||
package inframonitoringtypes
|
||||
|
||||
import "github.com/SigNoz/signoz/pkg/valuer"
|
||||
|
||||
type NodeCondition struct {
|
||||
valuer.String
|
||||
}
|
||||
|
||||
var (
|
||||
NodeConditionReady = NodeCondition{valuer.NewString("ready")}
|
||||
NodeConditionNotReady = NodeCondition{valuer.NewString("not_ready")}
|
||||
NodeConditionNoData = NodeCondition{valuer.NewString("no_data")}
|
||||
)
|
||||
|
||||
func (NodeCondition) Enum() []any {
|
||||
return []any{
|
||||
NodeConditionReady,
|
||||
NodeConditionNotReady,
|
||||
NodeConditionNoData,
|
||||
}
|
||||
}
|
||||
|
||||
// Numeric values emitted by the k8s.node.condition_ready metric
|
||||
// (source: OTel kubeletstats receiver).
|
||||
const (
|
||||
NodeConditionNumReady = 1
|
||||
NodeConditionNumNotReady = 0
|
||||
)
|
||||
|
||||
const (
|
||||
NodesOrderByCPU = "cpu"
|
||||
NodesOrderByCPUAllocatable = "cpu_allocatable"
|
||||
NodesOrderByMemory = "memory"
|
||||
NodesOrderByMemoryAllocatable = "memory_allocatable"
|
||||
)
|
||||
|
||||
var NodesValidOrderByKeys = []string{
|
||||
NodesOrderByCPU,
|
||||
NodesOrderByCPUAllocatable,
|
||||
NodesOrderByMemory,
|
||||
NodesOrderByMemoryAllocatable,
|
||||
}
|
||||
255
pkg/types/inframonitoringtypes/nodes_test.go
Normal file
255
pkg/types/inframonitoringtypes/nodes_test.go
Normal file
@@ -0,0 +1,255 @@
|
||||
package inframonitoringtypes
|
||||
|
||||
import (
|
||||
"testing"
|
||||
|
||||
"github.com/SigNoz/signoz/pkg/errors"
|
||||
qbtypes "github.com/SigNoz/signoz/pkg/types/querybuildertypes/querybuildertypesv5"
|
||||
"github.com/SigNoz/signoz/pkg/types/telemetrytypes"
|
||||
"github.com/SigNoz/signoz/pkg/valuer"
|
||||
"github.com/stretchr/testify/require"
|
||||
)
|
||||
|
||||
func TestPostableNodes_Validate(t *testing.T) {
|
||||
tests := []struct {
|
||||
name string
|
||||
req *PostableNodes
|
||||
wantErr bool
|
||||
}{
|
||||
{
|
||||
name: "valid request",
|
||||
req: &PostableNodes{
|
||||
Start: 1000,
|
||||
End: 2000,
|
||||
Limit: 100,
|
||||
Offset: 0,
|
||||
},
|
||||
wantErr: false,
|
||||
},
|
||||
{
|
||||
name: "nil request",
|
||||
req: nil,
|
||||
wantErr: true,
|
||||
},
|
||||
{
|
||||
name: "start time zero",
|
||||
req: &PostableNodes{
|
||||
Start: 0,
|
||||
End: 2000,
|
||||
Limit: 100,
|
||||
Offset: 0,
|
||||
},
|
||||
wantErr: true,
|
||||
},
|
||||
{
|
||||
name: "start time negative",
|
||||
req: &PostableNodes{
|
||||
Start: -1000,
|
||||
End: 2000,
|
||||
Limit: 100,
|
||||
Offset: 0,
|
||||
},
|
||||
wantErr: true,
|
||||
},
|
||||
{
|
||||
name: "end time zero",
|
||||
req: &PostableNodes{
|
||||
Start: 1000,
|
||||
End: 0,
|
||||
Limit: 100,
|
||||
Offset: 0,
|
||||
},
|
||||
wantErr: true,
|
||||
},
|
||||
{
|
||||
name: "start time greater than end time",
|
||||
req: &PostableNodes{
|
||||
Start: 2000,
|
||||
End: 1000,
|
||||
Limit: 100,
|
||||
Offset: 0,
|
||||
},
|
||||
wantErr: true,
|
||||
},
|
||||
{
|
||||
name: "start time equal to end time",
|
||||
req: &PostableNodes{
|
||||
Start: 1000,
|
||||
End: 1000,
|
||||
Limit: 100,
|
||||
Offset: 0,
|
||||
},
|
||||
wantErr: true,
|
||||
},
|
||||
{
|
||||
name: "limit zero",
|
||||
req: &PostableNodes{
|
||||
Start: 1000,
|
||||
End: 2000,
|
||||
Limit: 0,
|
||||
Offset: 0,
|
||||
},
|
||||
wantErr: true,
|
||||
},
|
||||
{
|
||||
name: "limit negative",
|
||||
req: &PostableNodes{
|
||||
Start: 1000,
|
||||
End: 2000,
|
||||
Limit: -10,
|
||||
Offset: 0,
|
||||
},
|
||||
wantErr: true,
|
||||
},
|
||||
{
|
||||
name: "limit exceeds max",
|
||||
req: &PostableNodes{
|
||||
Start: 1000,
|
||||
End: 2000,
|
||||
Limit: 5001,
|
||||
Offset: 0,
|
||||
},
|
||||
wantErr: true,
|
||||
},
|
||||
{
|
||||
name: "offset negative",
|
||||
req: &PostableNodes{
|
||||
Start: 1000,
|
||||
End: 2000,
|
||||
Limit: 100,
|
||||
Offset: -5,
|
||||
},
|
||||
wantErr: true,
|
||||
},
|
||||
{
|
||||
name: "orderBy nil is valid",
|
||||
req: &PostableNodes{
|
||||
Start: 1000,
|
||||
End: 2000,
|
||||
Limit: 100,
|
||||
Offset: 0,
|
||||
},
|
||||
wantErr: false,
|
||||
},
|
||||
{
|
||||
name: "orderBy with valid key cpu and direction asc",
|
||||
req: &PostableNodes{
|
||||
Start: 1000,
|
||||
End: 2000,
|
||||
Limit: 100,
|
||||
Offset: 0,
|
||||
OrderBy: &qbtypes.OrderBy{
|
||||
Key: qbtypes.OrderByKey{
|
||||
TelemetryFieldKey: telemetrytypes.TelemetryFieldKey{
|
||||
Name: NodesOrderByCPU,
|
||||
},
|
||||
},
|
||||
Direction: qbtypes.OrderDirectionAsc,
|
||||
},
|
||||
},
|
||||
wantErr: false,
|
||||
},
|
||||
{
|
||||
name: "orderBy with valid key cpu_allocatable and direction desc",
|
||||
req: &PostableNodes{
|
||||
Start: 1000,
|
||||
End: 2000,
|
||||
Limit: 100,
|
||||
Offset: 0,
|
||||
OrderBy: &qbtypes.OrderBy{
|
||||
Key: qbtypes.OrderByKey{
|
||||
TelemetryFieldKey: telemetrytypes.TelemetryFieldKey{
|
||||
Name: NodesOrderByCPUAllocatable,
|
||||
},
|
||||
},
|
||||
Direction: qbtypes.OrderDirectionDesc,
|
||||
},
|
||||
},
|
||||
wantErr: false,
|
||||
},
|
||||
{
|
||||
name: "orderBy with valid key memory_allocatable and direction asc",
|
||||
req: &PostableNodes{
|
||||
Start: 1000,
|
||||
End: 2000,
|
||||
Limit: 100,
|
||||
Offset: 0,
|
||||
OrderBy: &qbtypes.OrderBy{
|
||||
Key: qbtypes.OrderByKey{
|
||||
TelemetryFieldKey: telemetrytypes.TelemetryFieldKey{
|
||||
Name: NodesOrderByMemoryAllocatable,
|
||||
},
|
||||
},
|
||||
Direction: qbtypes.OrderDirectionAsc,
|
||||
},
|
||||
},
|
||||
wantErr: false,
|
||||
},
|
||||
{
|
||||
name: "orderBy with condition key is rejected",
|
||||
req: &PostableNodes{
|
||||
Start: 1000,
|
||||
End: 2000,
|
||||
Limit: 100,
|
||||
Offset: 0,
|
||||
OrderBy: &qbtypes.OrderBy{
|
||||
Key: qbtypes.OrderByKey{
|
||||
TelemetryFieldKey: telemetrytypes.TelemetryFieldKey{
|
||||
Name: "condition",
|
||||
},
|
||||
},
|
||||
Direction: qbtypes.OrderDirectionDesc,
|
||||
},
|
||||
},
|
||||
wantErr: true,
|
||||
},
|
||||
{
|
||||
name: "orderBy with invalid key",
|
||||
req: &PostableNodes{
|
||||
Start: 1000,
|
||||
End: 2000,
|
||||
Limit: 100,
|
||||
Offset: 0,
|
||||
OrderBy: &qbtypes.OrderBy{
|
||||
Key: qbtypes.OrderByKey{
|
||||
TelemetryFieldKey: telemetrytypes.TelemetryFieldKey{
|
||||
Name: "unknown",
|
||||
},
|
||||
},
|
||||
Direction: qbtypes.OrderDirectionDesc,
|
||||
},
|
||||
},
|
||||
wantErr: true,
|
||||
},
|
||||
{
|
||||
name: "orderBy with valid key but invalid direction",
|
||||
req: &PostableNodes{
|
||||
Start: 1000,
|
||||
End: 2000,
|
||||
Limit: 100,
|
||||
Offset: 0,
|
||||
OrderBy: &qbtypes.OrderBy{
|
||||
Key: qbtypes.OrderByKey{
|
||||
TelemetryFieldKey: telemetrytypes.TelemetryFieldKey{
|
||||
Name: NodesOrderByMemory,
|
||||
},
|
||||
},
|
||||
Direction: qbtypes.OrderDirection{String: valuer.NewString("invalid")},
|
||||
},
|
||||
},
|
||||
wantErr: true,
|
||||
},
|
||||
}
|
||||
|
||||
for _, tt := range tests {
|
||||
t.Run(tt.name, func(t *testing.T) {
|
||||
err := tt.req.Validate()
|
||||
if tt.wantErr {
|
||||
require.Error(t, err)
|
||||
require.True(t, errors.Ast(err, errors.TypeInvalidInput), "expected error to be of type InvalidInput")
|
||||
} else {
|
||||
require.NoError(t, err)
|
||||
}
|
||||
})
|
||||
}
|
||||
}
|
||||
@@ -17,22 +17,28 @@ type Pods struct {
|
||||
Warning *qbtypes.QueryWarnData `json:"warning,omitempty"`
|
||||
}
|
||||
|
||||
// PodCountsByPhase buckets pod counts by their latest phase in the time window.
|
||||
// Reusable across record types (pod / namespace / cluster).
|
||||
type PodCountsByPhase struct {
|
||||
Pending int `json:"pending" required:"true"`
|
||||
Running int `json:"running" required:"true"`
|
||||
Succeeded int `json:"succeeded" required:"true"`
|
||||
Failed int `json:"failed" required:"true"`
|
||||
Unknown int `json:"unknown" required:"true"`
|
||||
}
|
||||
|
||||
type PodRecord struct {
|
||||
PodUID string `json:"podUID" required:"true"`
|
||||
PodCPU float64 `json:"podCPU" required:"true"`
|
||||
PodCPURequest float64 `json:"podCPURequest" required:"true"`
|
||||
PodCPULimit float64 `json:"podCPULimit" required:"true"`
|
||||
PodMemory float64 `json:"podMemory" required:"true"`
|
||||
PodMemoryRequest float64 `json:"podMemoryRequest" required:"true"`
|
||||
PodMemoryLimit float64 `json:"podMemoryLimit" required:"true"`
|
||||
PodPhase PodPhase `json:"podPhase" required:"true"`
|
||||
PendingPodCount int `json:"pendingPodCount" required:"true"`
|
||||
RunningPodCount int `json:"runningPodCount" required:"true"`
|
||||
SucceededPodCount int `json:"succeededPodCount" required:"true"`
|
||||
FailedPodCount int `json:"failedPodCount" required:"true"`
|
||||
UnknownPodCount int `json:"unknownPodCount" required:"true"`
|
||||
PodAge int64 `json:"podAge" required:"true"`
|
||||
Meta map[string]interface{} `json:"meta" required:"true"`
|
||||
PodUID string `json:"podUID" required:"true"`
|
||||
PodCPU float64 `json:"podCPU" required:"true"`
|
||||
PodCPURequest float64 `json:"podCPURequest" required:"true"`
|
||||
PodCPULimit float64 `json:"podCPULimit" required:"true"`
|
||||
PodMemory float64 `json:"podMemory" required:"true"`
|
||||
PodMemoryRequest float64 `json:"podMemoryRequest" required:"true"`
|
||||
PodMemoryLimit float64 `json:"podMemoryLimit" required:"true"`
|
||||
PodPhase PodPhase `json:"podPhase" required:"true"`
|
||||
PodCountsByPhase PodCountsByPhase `json:"podCountsByPhase" required:"true"`
|
||||
PodAge int64 `json:"podAge" required:"true"`
|
||||
Meta map[string]string `json:"meta" required:"true"`
|
||||
}
|
||||
|
||||
// PostablePods is the request body for the v2 pods list API.
|
||||
|
||||
@@ -12,7 +12,7 @@ var (
|
||||
PodPhaseSucceeded = PodPhase{valuer.NewString("succeeded")}
|
||||
PodPhaseFailed = PodPhase{valuer.NewString("failed")}
|
||||
PodPhaseUnknown = PodPhase{valuer.NewString("unknown")}
|
||||
PodPhaseNone = PodPhase{valuer.NewString("")}
|
||||
PodPhaseNoData = PodPhase{valuer.NewString("no_data")}
|
||||
)
|
||||
|
||||
func (PodPhase) Enum() []any {
|
||||
@@ -22,7 +22,7 @@ func (PodPhase) Enum() []any {
|
||||
PodPhaseSucceeded,
|
||||
PodPhaseFailed,
|
||||
PodPhaseUnknown,
|
||||
PodPhaseNone,
|
||||
PodPhaseNoData,
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
Reference in New Issue
Block a user