Compare commits

..

1 Commits

Author SHA1 Message Date
nityanandagohain
4744f83cfe fix(querier): pad clamped time range for trace_id-filtered logs 2026-06-21 11:19:42 +05:30
25 changed files with 113 additions and 2107 deletions

View File

@@ -141,6 +141,10 @@ querier:
flux_interval: 5m
# The maximum number of concurrent queries for missing ranges.
max_concurrent_queries: 4
# When filtering logs by trace_id, clamp the query window to the trace time
# range with padding to include slightly delayed log exports. Logs only; set
# to 0 to disable.
log_trace_id_window_padding: 5m
##################### TelemetryStore #####################
telemetrystore:

View File

@@ -3895,29 +3895,6 @@ components:
enabled:
type: boolean
type: object
InframonitoringtypesAssociatedComponent:
properties:
name:
type: string
type:
$ref: '#/components/schemas/InframonitoringtypesOnboardingComponentType'
required:
- type
- name
type: object
InframonitoringtypesAttributesComponentEntry:
properties:
associatedComponent:
$ref: '#/components/schemas/InframonitoringtypesAssociatedComponent'
attributes:
items:
type: string
nullable: true
type: array
required:
- attributes
- associatedComponent
type: object
InframonitoringtypesClusterRecord:
properties:
clusterCPU:
@@ -4267,57 +4244,6 @@ components:
- requiredMetricsCheck
- endTimeBeforeRetention
type: object
InframonitoringtypesMetricsComponentEntry:
properties:
associatedComponent:
$ref: '#/components/schemas/InframonitoringtypesAssociatedComponent'
metrics:
items:
type: string
nullable: true
type: array
required:
- metrics
- associatedComponent
type: object
InframonitoringtypesMissingAttributesComponentEntry:
properties:
associatedComponent:
$ref: '#/components/schemas/InframonitoringtypesAssociatedComponent'
attributes:
items:
type: string
nullable: true
type: array
documentationLink:
type: string
message:
type: string
required:
- attributes
- associatedComponent
- message
- documentationLink
type: object
InframonitoringtypesMissingMetricsComponentEntry:
properties:
associatedComponent:
$ref: '#/components/schemas/InframonitoringtypesAssociatedComponent'
documentationLink:
type: string
message:
type: string
metrics:
items:
type: string
nullable: true
type: array
required:
- metrics
- associatedComponent
- message
- documentationLink
type: object
InframonitoringtypesNamespaceRecord:
properties:
meta:
@@ -4442,71 +4368,6 @@ components:
- requiredMetricsCheck
- endTimeBeforeRetention
type: object
InframonitoringtypesOnboarding:
properties:
missingDefaultEnabledMetrics:
items:
$ref: '#/components/schemas/InframonitoringtypesMissingMetricsComponentEntry'
nullable: true
type: array
missingOptionalMetrics:
items:
$ref: '#/components/schemas/InframonitoringtypesMissingMetricsComponentEntry'
nullable: true
type: array
missingRequiredAttributes:
items:
$ref: '#/components/schemas/InframonitoringtypesMissingAttributesComponentEntry'
nullable: true
type: array
presentDefaultEnabledMetrics:
items:
$ref: '#/components/schemas/InframonitoringtypesMetricsComponentEntry'
nullable: true
type: array
presentOptionalMetrics:
items:
$ref: '#/components/schemas/InframonitoringtypesMetricsComponentEntry'
nullable: true
type: array
presentRequiredAttributes:
items:
$ref: '#/components/schemas/InframonitoringtypesAttributesComponentEntry'
nullable: true
type: array
ready:
type: boolean
type:
$ref: '#/components/schemas/InframonitoringtypesOnboardingType'
required:
- type
- ready
- presentDefaultEnabledMetrics
- presentOptionalMetrics
- presentRequiredAttributes
- missingDefaultEnabledMetrics
- missingOptionalMetrics
- missingRequiredAttributes
type: object
InframonitoringtypesOnboardingComponentType:
enum:
- receiver
- processor
type: string
InframonitoringtypesOnboardingType:
enum:
- hosts
- processes
- pods
- nodes
- deployments
- daemonsets
- statefulsets
- jobs
- namespaces
- clusters
- volumes
type: string
InframonitoringtypesPodCountsByPhase:
properties:
failed:
@@ -15328,72 +15189,6 @@ paths:
summary: List Nodes for Infra Monitoring
tags:
- inframonitoring
/api/v2/infra_monitoring/onboarding:
get:
deprecated: false
description: 'Returns the per-tab readiness of the infra-monitoring section
selected by the ''type'' query parameter (hosts, processes, pods, nodes, deployments,
daemonsets, statefulsets, jobs, namespaces, clusters, volumes). For each collector
receiver or processor that contributes required metrics or attributes, lists
what is present and what is missing, with a prebuilt user-facing message and
a docs link per missing component. Default-enabled metrics are those expected
as soon as the receiver is configured; optional metrics require ''enabled:
true'' in receiver config. ''ready'' is true only when every missing list
is empty.'
operationId: GetOnboarding
parameters:
- in: query
name: type
required: true
schema:
$ref: '#/components/schemas/InframonitoringtypesOnboardingType'
responses:
"200":
content:
application/json:
schema:
properties:
data:
$ref: '#/components/schemas/InframonitoringtypesOnboarding'
status:
type: string
required:
- status
- data
type: object
description: OK
"400":
content:
application/json:
schema:
$ref: '#/components/schemas/RenderErrorResponse'
description: Bad Request
"401":
content:
application/json:
schema:
$ref: '#/components/schemas/RenderErrorResponse'
description: Unauthorized
"403":
content:
application/json:
schema:
$ref: '#/components/schemas/RenderErrorResponse'
description: Forbidden
"500":
content:
application/json:
schema:
$ref: '#/components/schemas/RenderErrorResponse'
description: Internal Server Error
security:
- api_key:
- VIEWER
- tokenizer:
- VIEWER
summary: Get Onboarding Status for Infra Monitoring
tags:
- inframonitoring
/api/v2/infra_monitoring/pods:
post:
deprecated: false

View File

@@ -4,22 +4,14 @@
* * regenerate with 'pnpm generate:api'
* SigNoz
*/
import { useMutation, useQuery } from 'react-query';
import { useMutation } from 'react-query';
import type {
InvalidateOptions,
MutationFunction,
QueryClient,
QueryFunction,
QueryKey,
UseMutationOptions,
UseMutationResult,
UseQueryOptions,
UseQueryResult,
} from 'react-query';
import type {
GetOnboarding200,
GetOnboardingParams,
InframonitoringtypesPostableClustersDTO,
InframonitoringtypesPostableDaemonSetsDTO,
InframonitoringtypesPostableDeploymentsDTO,
@@ -627,104 +619,6 @@ export const useListNodes = <
> => {
return useMutation(getListNodesMutationOptions(options));
};
/**
* Returns the per-tab readiness of the infra-monitoring section selected by the 'type' query parameter (hosts, processes, pods, nodes, deployments, daemonsets, statefulsets, jobs, namespaces, clusters, volumes). For each collector receiver or processor that contributes required metrics or attributes, lists what is present and what is missing, with a prebuilt user-facing message and a docs link per missing component. Default-enabled metrics are those expected as soon as the receiver is configured; optional metrics require 'enabled: true' in receiver config. 'ready' is true only when every missing list is empty.
* @summary Get Onboarding Status for Infra Monitoring
*/
export const getOnboarding = (
params: GetOnboardingParams,
signal?: AbortSignal,
) => {
return GeneratedAPIInstance<GetOnboarding200>({
url: `/api/v2/infra_monitoring/onboarding`,
method: 'GET',
params,
signal,
});
};
export const getGetOnboardingQueryKey = (params?: GetOnboardingParams) => {
return [
`/api/v2/infra_monitoring/onboarding`,
...(params ? [params] : []),
] as const;
};
export const getGetOnboardingQueryOptions = <
TData = Awaited<ReturnType<typeof getOnboarding>>,
TError = ErrorType<RenderErrorResponseDTO>,
>(
params: GetOnboardingParams,
options?: {
query?: UseQueryOptions<
Awaited<ReturnType<typeof getOnboarding>>,
TError,
TData
>;
},
) => {
const { query: queryOptions } = options ?? {};
const queryKey = queryOptions?.queryKey ?? getGetOnboardingQueryKey(params);
const queryFn: QueryFunction<Awaited<ReturnType<typeof getOnboarding>>> = ({
signal,
}) => getOnboarding(params, signal);
return { queryKey, queryFn, ...queryOptions } as UseQueryOptions<
Awaited<ReturnType<typeof getOnboarding>>,
TError,
TData
> & { queryKey: QueryKey };
};
export type GetOnboardingQueryResult = NonNullable<
Awaited<ReturnType<typeof getOnboarding>>
>;
export type GetOnboardingQueryError = ErrorType<RenderErrorResponseDTO>;
/**
* @summary Get Onboarding Status for Infra Monitoring
*/
export function useGetOnboarding<
TData = Awaited<ReturnType<typeof getOnboarding>>,
TError = ErrorType<RenderErrorResponseDTO>,
>(
params: GetOnboardingParams,
options?: {
query?: UseQueryOptions<
Awaited<ReturnType<typeof getOnboarding>>,
TError,
TData
>;
},
): UseQueryResult<TData, TError> & { queryKey: QueryKey } {
const queryOptions = getGetOnboardingQueryOptions(params, options);
const query = useQuery(queryOptions) as UseQueryResult<TData, TError> & {
queryKey: QueryKey;
};
return { ...query, queryKey: queryOptions.queryKey };
}
/**
* @summary Get Onboarding Status for Infra Monitoring
*/
export const invalidateGetOnboarding = async (
queryClient: QueryClient,
params: GetOnboardingParams,
options?: InvalidateOptions,
): Promise<QueryClient> => {
await queryClient.invalidateQueries(
{ queryKey: getGetOnboardingQueryKey(params) },
options,
);
return queryClient;
};
/**
* Returns a paginated list of Kubernetes pods with key metrics: CPU usage, CPU request/limit utilization, memory working set, memory request/limit utilization, current pod phase (pending/running/succeeded/failed/unknown/no_data), and pod age (ms since start time). Each pod includes metadata attributes (namespace, node, workload owner such as deployment/statefulset/daemonset/job/cronjob, cluster). Supports filtering via a filter expression, custom groupBy to aggregate pods by any attribute, ordering by any of the six metrics (cpu, cpu_request, cpu_limit, memory, memory_request, memory_limit), and pagination via offset/limit. The response type is 'list' for the default k8s.pod.uid grouping (each row is one pod with its current phase) or 'grouped_list' for custom groupBy keys (each row aggregates pods in the group with per-phase counts under podCountsByPhase: { pending, running, succeeded, failed, unknown } derived from each pod's latest phase in the window). Also reports missing required metrics and whether the requested time range falls before the data retention boundary. Numeric metric fields (podCPU, podCPURequest, podCPULimit, podMemory, podMemoryRequest, podMemoryLimit, podAge) return -1 as a sentinel when no data is available for that field.
* @summary List Pods for Infra Monitoring

View File

@@ -5346,26 +5346,6 @@ export interface GlobaltypesConfigDTO {
mcp_url: string | null;
}
export enum InframonitoringtypesOnboardingComponentTypeDTO {
receiver = 'receiver',
processor = 'processor',
}
export interface InframonitoringtypesAssociatedComponentDTO {
/**
* @type string
*/
name: string;
type: InframonitoringtypesOnboardingComponentTypeDTO;
}
export interface InframonitoringtypesAttributesComponentEntryDTO {
associatedComponent: InframonitoringtypesAssociatedComponentDTO;
/**
* @type array,null
*/
attributes: string[] | null;
}
export type InframonitoringtypesClusterRecordDTOMetaAnyOf = {
[key: string]: string;
};
@@ -5822,46 +5802,6 @@ export interface InframonitoringtypesJobsDTO {
warning?: Querybuildertypesv5QueryWarnDataDTO;
}
export interface InframonitoringtypesMetricsComponentEntryDTO {
associatedComponent: InframonitoringtypesAssociatedComponentDTO;
/**
* @type array,null
*/
metrics: string[] | null;
}
export interface InframonitoringtypesMissingAttributesComponentEntryDTO {
associatedComponent: InframonitoringtypesAssociatedComponentDTO;
/**
* @type array,null
*/
attributes: string[] | null;
/**
* @type string
*/
documentationLink: string;
/**
* @type string
*/
message: string;
}
export interface InframonitoringtypesMissingMetricsComponentEntryDTO {
associatedComponent: InframonitoringtypesAssociatedComponentDTO;
/**
* @type string
*/
documentationLink: string;
/**
* @type string
*/
message: string;
/**
* @type array,null
*/
metrics: string[] | null;
}
export type InframonitoringtypesNamespaceRecordDTOMetaAnyOf = {
[key: string]: string;
};
@@ -5979,61 +5919,6 @@ export interface InframonitoringtypesNodesDTO {
warning?: Querybuildertypesv5QueryWarnDataDTO;
}
export enum InframonitoringtypesOnboardingTypeDTO {
hosts = 'hosts',
processes = 'processes',
pods = 'pods',
nodes = 'nodes',
deployments = 'deployments',
daemonsets = 'daemonsets',
statefulsets = 'statefulsets',
jobs = 'jobs',
namespaces = 'namespaces',
clusters = 'clusters',
volumes = 'volumes',
}
export interface InframonitoringtypesOnboardingDTO {
/**
* @type array,null
*/
missingDefaultEnabledMetrics:
| InframonitoringtypesMissingMetricsComponentEntryDTO[]
| null;
/**
* @type array,null
*/
missingOptionalMetrics:
| InframonitoringtypesMissingMetricsComponentEntryDTO[]
| null;
/**
* @type array,null
*/
missingRequiredAttributes:
| InframonitoringtypesMissingAttributesComponentEntryDTO[]
| null;
/**
* @type array,null
*/
presentDefaultEnabledMetrics:
| InframonitoringtypesMetricsComponentEntryDTO[]
| null;
/**
* @type array,null
*/
presentOptionalMetrics: InframonitoringtypesMetricsComponentEntryDTO[] | null;
/**
* @type array,null
*/
presentRequiredAttributes:
| InframonitoringtypesAttributesComponentEntryDTO[]
| null;
/**
* @type boolean
*/
ready: boolean;
type: InframonitoringtypesOnboardingTypeDTO;
}
export enum InframonitoringtypesPodPhaseDTO {
pending = 'pending',
running = 'running',
@@ -10292,21 +10177,6 @@ export type ListNodes200 = {
status: string;
};
export type GetOnboardingParams = {
/**
* @description undefined
*/
type: InframonitoringtypesOnboardingTypeDTO;
};
export type GetOnboarding200 = {
data: InframonitoringtypesOnboardingDTO;
/**
* @type string
*/
status: string;
};
export type ListPods200 = {
data: InframonitoringtypesPodsDTO;
/**

View File

@@ -200,23 +200,5 @@ func (provider *provider) addInfraMonitoringRoutes(router *mux.Router) error {
return err
}
if err := router.Handle("/api/v2/infra_monitoring/onboarding", handler.New(
provider.authzMiddleware.ViewAccess(provider.infraMonitoringHandler.GetOnboarding),
handler.OpenAPIDef{
ID: "GetOnboarding",
Tags: []string{"inframonitoring"},
Summary: "Get Onboarding Status for Infra Monitoring",
Description: "Returns the per-tab readiness of the infra-monitoring section selected by the 'type' query parameter (hosts, processes, pods, nodes, deployments, daemonsets, statefulsets, jobs, namespaces, clusters, volumes). For each collector receiver or processor that contributes required metrics or attributes, lists what is present and what is missing, with a prebuilt user-facing message and a docs link per missing component. Default-enabled metrics are those expected as soon as the receiver is configured; optional metrics require 'enabled: true' in receiver config. 'ready' is true only when every missing list is empty.",
RequestQuery: new(inframonitoringtypes.PostableOnboarding),
Response: new(inframonitoringtypes.Onboarding),
ResponseContentType: "application/json",
SuccessStatusCode: http.StatusOK,
ErrorStatusCodes: []int{http.StatusBadRequest, http.StatusUnauthorized},
Deprecated: false,
SecuritySchemes: newSecuritySchemes(types.RoleViewer),
})).Methods(http.MethodGet).GetError(); err != nil {
return err
}
return nil
}

View File

@@ -22,30 +22,6 @@ func NewHandler(m inframonitoring.Module) inframonitoring.Handler {
}
}
func (h *handler) GetOnboarding(rw http.ResponseWriter, req *http.Request) {
claims, err := authtypes.ClaimsFromContext(req.Context())
if err != nil {
render.Error(rw, err)
return
}
orgID := valuer.MustNewUUID(claims.OrgID)
var parsedReq inframonitoringtypes.PostableOnboarding
if err := binding.Query.BindQuery(req.URL.Query(), &parsedReq); err != nil {
render.Error(rw, err)
return
}
result, err := h.module.GetOnboarding(req.Context(), orgID, &parsedReq)
if err != nil {
render.Error(rw, err)
return
}
render.Success(rw, http.StatusOK, result)
}
func (h *handler) ListHosts(rw http.ResponseWriter, req *http.Request) {
claims, err := authtypes.ClaimsFromContext(req.Context())
if err != nil {

View File

@@ -473,96 +473,6 @@ func (m *module) getMetricsExistenceAndEarliestTime(ctx context.Context, metricN
return missingMetrics, globalMinFirstReported, nil
}
// getMetricsExistence returns, for each requested metric name, whether it has ever
// been reported (present in signoz_metrics.distributed_metadata). No time window.
func (m *module) getMetricsExistence(ctx context.Context, metricNames []string) (map[string]bool, error) {
present := make(map[string]bool, len(metricNames))
for _, n := range metricNames {
present[n] = false
}
if len(metricNames) == 0 {
return present, nil
}
sb := sqlbuilder.NewSelectBuilder()
sb.Select("metric_name", "count(*) AS cnt")
sb.From(fmt.Sprintf("%s.%s", telemetrymetrics.DBName, telemetrymetrics.AttributesMetadataTableName))
sb.Where(sb.In("metric_name", sqlbuilder.List(metricNames)))
sb.GroupBy("metric_name")
query, args := sb.BuildWithFlavor(sqlbuilder.ClickHouse)
rows, err := m.telemetryStore.ClickhouseDB().Query(ctx, query, args...)
if err != nil {
return nil, err
}
defer rows.Close()
for rows.Next() {
var name string
var cnt uint64
if err := rows.Scan(&name, &cnt); err != nil {
return nil, err
}
if cnt > 0 {
present[name] = true
}
}
if err := rows.Err(); err != nil {
return nil, err
}
return present, nil
}
// getAttributesExistence returns, for each requested attrName, whether it has ever
// been reported as a label on any of the given metricNames. Presence is checked
// against distributed_metadata without a time-range filter.
func (m *module) getAttributesExistence(ctx context.Context, metricNames, attrNames []string) (map[string]bool, error) {
present := make(map[string]bool, len(attrNames))
for _, a := range attrNames {
present[a] = false
}
if len(attrNames) == 0 {
return present, nil
}
if len(metricNames) == 0 {
return nil, errors.NewInternalf(errors.CodeInternal, "getAttributesExistence: metricNames must not be empty")
}
sb := sqlbuilder.NewSelectBuilder()
sb.Select("attr_name", "count(*) AS cnt")
sb.From(fmt.Sprintf("%s.%s", telemetrymetrics.DBName, telemetrymetrics.AttributesMetadataTableName))
sb.Where(
sb.In("metric_name", sqlbuilder.List(metricNames)),
sb.In("attr_name", sqlbuilder.List(attrNames)),
)
sb.GroupBy("attr_name")
query, args := sb.BuildWithFlavor(sqlbuilder.ClickHouse)
rows, err := m.telemetryStore.ClickhouseDB().Query(ctx, query, args...)
if err != nil {
return nil, err
}
defer rows.Close()
for rows.Next() {
var name string
var cnt uint64
if err := rows.Scan(&name, &cnt); err != nil {
return nil, err
}
if name != "" && cnt > 0 {
present[name] = true
}
}
if err := rows.Err(); err != nil {
return nil, err
}
return present, nil
}
// getMetadata fetches the latest values of additionalCols for each unique combination of groupBy keys,
// within the given time range and metric names. It uses argMax(tuple(...), unix_milli) to ensure
// we always pick attribute values from the latest timestamp for each group.

View File

@@ -1,7 +1,5 @@
package implinframonitoring
import "github.com/SigNoz/signoz/pkg/types/inframonitoringtypes"
// The types in this file are only used within the implinframonitoring package, and are not exposed outside.
// They are primarily used for internal processing and structuring of data within the module's implementation.
@@ -31,50 +29,3 @@ type nodeConditionCounts struct {
Ready int
NotReady int
}
// bucketSplit carries the up-to-six entries a single spec bucket contributes
// to an onboarding response. Any field may be nil if the bucket doesn't
// populate that dimension.
type bucketSplit struct {
PresentDefault *inframonitoringtypes.MetricsComponentEntry
PresentOptional *inframonitoringtypes.MetricsComponentEntry
PresentAttrs *inframonitoringtypes.AttributesComponentEntry
MissingDefault *inframonitoringtypes.MissingMetricsComponentEntry
MissingOptional *inframonitoringtypes.MissingMetricsComponentEntry
MissingAttrs *inframonitoringtypes.MissingAttributesComponentEntry
}
// onboardingComponentBucket is a single collector component's contribution
// toward a single infra-monitoring tab's readiness. Any of the three dimension
// slices (DefaultMetrics, OptionalMetrics, RequiredAttrs) may be empty — the
// bucketizer in Phase 4 skips empty dimensions.
type onboardingComponentBucket struct {
Component inframonitoringtypes.AssociatedComponent
DefaultMetrics []string
OptionalMetrics []string
RequiredAttrs []string
DocumentationLink string
}
// onboardingSpec defines, for one OnboardingType, the full set of
// component-scoped buckets that must be satisfied for the tab to be ready.
type onboardingSpec struct {
Buckets []onboardingComponentBucket
}
func (s onboardingSpec) getAllMetrics() []string {
var out []string
for _, b := range s.Buckets {
out = append(out, b.DefaultMetrics...)
out = append(out, b.OptionalMetrics...)
}
return out
}
func (s onboardingSpec) getAllAttrs() []string {
var out []string
for _, b := range s.Buckets {
out = append(out, b.RequiredAttrs...)
}
return out
}

View File

@@ -49,84 +49,6 @@ func NewModule(
}
}
// GetOnboarding runs a per-type readiness check: for the requested
// infra-monitoring tab, reports which required metrics and attributes are
// present vs missing, grouped by the collector component that produces them.
// Ready is true iff every missing list is empty.
func (m *module) GetOnboarding(ctx context.Context, orgID valuer.UUID, req *inframonitoringtypes.PostableOnboarding) (*inframonitoringtypes.Onboarding, error) {
if err := req.Validate(); err != nil {
return nil, err
}
spec, err := getSpecForType(req.Type)
if err != nil {
return nil, err
}
allMetrics := spec.getAllMetrics()
allAttrs := spec.getAllAttrs()
presentMetrics, err := m.getMetricsExistence(ctx, allMetrics)
if err != nil {
return nil, err
}
missingMetricsMap := make(map[string]bool, len(allMetrics))
for _, name := range allMetrics {
if !presentMetrics[name] {
missingMetricsMap[name] = true
}
}
presentAttrs, err := m.getAttributesExistence(ctx, allMetrics, allAttrs)
if err != nil {
return nil, err
}
missingAttrsMap := make(map[string]bool, len(allAttrs))
for _, name := range allAttrs {
if !presentAttrs[name] {
missingAttrsMap[name] = true
}
}
resp := &inframonitoringtypes.Onboarding{
Type: req.Type,
PresentDefaultEnabledMetrics: []inframonitoringtypes.MetricsComponentEntry{},
PresentOptionalMetrics: []inframonitoringtypes.MetricsComponentEntry{},
PresentRequiredAttributes: []inframonitoringtypes.AttributesComponentEntry{},
MissingDefaultEnabledMetrics: []inframonitoringtypes.MissingMetricsComponentEntry{},
MissingOptionalMetrics: []inframonitoringtypes.MissingMetricsComponentEntry{},
MissingRequiredAttributes: []inframonitoringtypes.MissingAttributesComponentEntry{},
}
for _, b := range spec.Buckets {
s := splitBucket(b, missingMetricsMap, missingAttrsMap)
if s.PresentDefault != nil {
resp.PresentDefaultEnabledMetrics = append(resp.PresentDefaultEnabledMetrics, *s.PresentDefault)
}
if s.PresentOptional != nil {
resp.PresentOptionalMetrics = append(resp.PresentOptionalMetrics, *s.PresentOptional)
}
if s.PresentAttrs != nil {
resp.PresentRequiredAttributes = append(resp.PresentRequiredAttributes, *s.PresentAttrs)
}
if s.MissingDefault != nil {
resp.MissingDefaultEnabledMetrics = append(resp.MissingDefaultEnabledMetrics, *s.MissingDefault)
}
if s.MissingOptional != nil {
resp.MissingOptionalMetrics = append(resp.MissingOptionalMetrics, *s.MissingOptional)
}
if s.MissingAttrs != nil {
resp.MissingRequiredAttributes = append(resp.MissingRequiredAttributes, *s.MissingAttrs)
}
}
resp.Ready = len(resp.MissingDefaultEnabledMetrics) == 0 &&
len(resp.MissingOptionalMetrics) == 0 &&
len(resp.MissingRequiredAttributes) == 0
return resp, nil
}
func (m *module) ListHosts(ctx context.Context, orgID valuer.UUID, req *inframonitoringtypes.PostableHosts) (*inframonitoringtypes.Hosts, error) {
ctx = m.withInfraMonitoringContext(ctx, "ListHosts")

View File

@@ -1,114 +0,0 @@
package implinframonitoring
import (
"fmt"
"strings"
"github.com/SigNoz/signoz/pkg/errors"
"github.com/SigNoz/signoz/pkg/types/inframonitoringtypes"
)
// splitBucket partitions one component bucket's metric and attribute lists
// against the module-wide missing sets into up to six response entries.
// Empty partitions are left nil so callers can skip them.
func splitBucket(b onboardingComponentBucket, missingMetrics, missingAttrs map[string]bool) bucketSplit {
var s bucketSplit
presentDef, missDef := partitionList(b.DefaultMetrics, missingMetrics)
if len(presentDef) > 0 {
s.PresentDefault = &inframonitoringtypes.MetricsComponentEntry{
Metrics: presentDef,
AssociatedComponent: b.Component,
}
}
if len(missDef) > 0 {
s.MissingDefault = &inframonitoringtypes.MissingMetricsComponentEntry{
MetricsComponentEntry: inframonitoringtypes.MetricsComponentEntry{
Metrics: missDef,
AssociatedComponent: b.Component,
},
Message: buildMissingDefaultMetricsMessage(missDef, b.Component.Name),
DocumentationLink: b.DocumentationLink,
}
}
presentOpt, missOpt := partitionList(b.OptionalMetrics, missingMetrics)
if len(presentOpt) > 0 {
s.PresentOptional = &inframonitoringtypes.MetricsComponentEntry{
Metrics: presentOpt,
AssociatedComponent: b.Component,
}
}
if len(missOpt) > 0 {
s.MissingOptional = &inframonitoringtypes.MissingMetricsComponentEntry{
MetricsComponentEntry: inframonitoringtypes.MetricsComponentEntry{
Metrics: missOpt,
AssociatedComponent: b.Component,
},
Message: buildMissingOptionalMetricsMessage(missOpt, b.Component.Name),
DocumentationLink: b.DocumentationLink,
}
}
presentA, missA := partitionList(b.RequiredAttrs, missingAttrs)
if len(presentA) > 0 {
s.PresentAttrs = &inframonitoringtypes.AttributesComponentEntry{
Attributes: presentA,
AssociatedComponent: b.Component,
}
}
if len(missA) > 0 {
s.MissingAttrs = &inframonitoringtypes.MissingAttributesComponentEntry{
AttributesComponentEntry: inframonitoringtypes.AttributesComponentEntry{
Attributes: missA,
AssociatedComponent: b.Component,
},
Message: buildMissingRequiredAttrsMessage(missA, b.Component.Name),
DocumentationLink: b.DocumentationLink,
}
}
return s
}
// getSpecForType returns the onboardingSpec for a given OnboardingType, or an error if the type is invalid.
func getSpecForType(t inframonitoringtypes.OnboardingType) (*onboardingSpec, error) {
spec, ok := onboardingSpecs[t]
if !ok {
return nil, errors.NewInvalidInputf(errors.CodeInvalidInput, "no onboarding spec for type: %s", t)
}
return &spec, nil
}
// partitionList splits items into those NOT in `missing` and those in `missing`.
// Preserves input order.
func partitionList(items []string, missing map[string]bool) (present, miss []string) {
for _, x := range items {
if missing[x] {
miss = append(miss, x)
} else {
present = append(present, x)
}
}
return present, miss
}
func buildMissingDefaultMetricsMessage(metrics []string, componentName string) string {
return fmt.Sprintf(
"Missing default metrics %s from %s. Learn how to configure here.",
strings.Join(metrics, ", "), componentName,
)
}
func buildMissingOptionalMetricsMessage(metrics []string, componentName string) string {
return fmt.Sprintf(
"Missing optional metrics %s from %s. Learn how to enable here.",
strings.Join(metrics, ", "), componentName,
)
}
func buildMissingRequiredAttrsMessage(attrs []string, componentName string) string {
return fmt.Sprintf(
"Missing required attributes %s from %s. Learn how to configure here.",
strings.Join(attrs, ", "), componentName,
)
}

View File

@@ -1,397 +0,0 @@
package implinframonitoring
import "github.com/SigNoz/signoz/pkg/types/inframonitoringtypes"
// Component names — the 5 OTel collector receivers/processors that produce
// metrics and resource attributes consumed by infra-monitoring tabs. Bare
// strings on purpose (not wrapped enums) — the list is open-ended enough that
// an enum adds more friction than value.
const (
componentNameHostMetricsReceiver = "hostmetricsreceiver"
componentNameKubeletStatsReceiver = "kubeletstatsreceiver"
componentNameK8sClusterReceiver = "k8sclusterreceiver"
componentNameResourceDetectionProcessor = "resourcedetectionprocessor"
componentNameK8sAttributesProcessor = "k8sattributesprocessor"
)
// Documentation links — one per component. User-facing; emitted on missing-entries.
const (
docLinkHostMetricsReceiver = "https://signoz.io/docs/infrastructure-monitoring/user-guides/hostmetrics/#configure-the-hostmetrics-receiver"
docLinkKubeletStatsReceiver = "https://signoz.io/docs/infrastructure-monitoring/user-guides/k8s-metrics/#setup-kubelet-stats-receiver"
docLinkK8sClusterReceiver = "https://signoz.io/docs/infrastructure-monitoring/user-guides/k8s-metrics/#setup-k8s-cluster-receiver"
docLinkResourceDetectionProcessor = "https://signoz.io/docs/infrastructure-monitoring/user-guides/hostmetrics/#configure-the-resourcedetection-processor"
docLinkK8sAttributesProcessor = "https://signoz.io/docs/infrastructure-monitoring/user-guides/k8s-metrics/#3-setup-k8sattributesprocessor-to-enable-kubernetes-metadata"
)
var (
componentHostMetricsReceiver = inframonitoringtypes.AssociatedComponent{
Type: inframonitoringtypes.OnboardingComponentTypeReceiver,
Name: componentNameHostMetricsReceiver,
}
componentKubeletStatsReceiver = inframonitoringtypes.AssociatedComponent{
Type: inframonitoringtypes.OnboardingComponentTypeReceiver,
Name: componentNameKubeletStatsReceiver,
}
componentK8sClusterReceiver = inframonitoringtypes.AssociatedComponent{
Type: inframonitoringtypes.OnboardingComponentTypeReceiver,
Name: componentNameK8sClusterReceiver,
}
componentResourceDetectionProcessor = inframonitoringtypes.AssociatedComponent{
Type: inframonitoringtypes.OnboardingComponentTypeProcessor,
Name: componentNameResourceDetectionProcessor,
}
componentK8sAttributesProcessor = inframonitoringtypes.AssociatedComponent{
Type: inframonitoringtypes.OnboardingComponentTypeProcessor,
Name: componentNameK8sAttributesProcessor,
}
)
// onboardingSpecs is the single lookup table the module consults for a type's
// readiness contract. Every OnboardingType value must have an entry here.
var onboardingSpecs = map[inframonitoringtypes.OnboardingType]onboardingSpec{
inframonitoringtypes.OnboardingTypeHosts: hostsSpec,
inframonitoringtypes.OnboardingTypeProcesses: processesSpec,
inframonitoringtypes.OnboardingTypePods: podsSpec,
inframonitoringtypes.OnboardingTypeNodes: nodesSpec,
inframonitoringtypes.OnboardingTypeDeployments: deploymentsSpec,
inframonitoringtypes.OnboardingTypeDaemonsets: daemonsetsSpec,
inframonitoringtypes.OnboardingTypeStatefulsets: statefulsetsSpec,
inframonitoringtypes.OnboardingTypeJobs: jobsSpec,
inframonitoringtypes.OnboardingTypeNamespaces: namespacesSpec,
inframonitoringtypes.OnboardingTypeClusters: clustersSpec,
inframonitoringtypes.OnboardingTypeVolumes: volumesSpec,
}
// Per-type specs. Every metric and attribute is spelled out in its own spec
// on purpose — no shared slices, no concatenation helpers. Repetition is
// cheaper than indirection when auditing what each tab actually requires.
var hostsSpec = onboardingSpec{
Buckets: []onboardingComponentBucket{
{
Component: componentHostMetricsReceiver,
DefaultMetrics: []string{
"system.cpu.time",
"system.memory.usage",
"system.cpu.load_average.15m",
"system.filesystem.usage",
},
DocumentationLink: docLinkHostMetricsReceiver,
},
{
Component: componentResourceDetectionProcessor,
RequiredAttrs: []string{"host.name"},
DocumentationLink: docLinkResourceDetectionProcessor,
},
},
}
var processesSpec = onboardingSpec{
Buckets: []onboardingComponentBucket{
{
Component: componentHostMetricsReceiver,
DefaultMetrics: []string{
"process.cpu.time",
"process.memory.usage",
},
RequiredAttrs: []string{"process.pid"},
DocumentationLink: docLinkHostMetricsReceiver,
},
},
}
var podsSpec = onboardingSpec{
Buckets: []onboardingComponentBucket{
{
Component: componentKubeletStatsReceiver,
DefaultMetrics: []string{
"k8s.pod.cpu.usage",
"k8s.pod.memory.working_set",
},
OptionalMetrics: []string{
"k8s.pod.cpu_request_utilization",
"k8s.pod.cpu_limit_utilization",
"k8s.pod.memory_request_utilization",
"k8s.pod.memory_limit_utilization",
},
DocumentationLink: docLinkKubeletStatsReceiver,
},
{
Component: componentK8sClusterReceiver,
DefaultMetrics: []string{"k8s.pod.phase"},
DocumentationLink: docLinkK8sClusterReceiver,
},
{
Component: componentK8sAttributesProcessor,
RequiredAttrs: []string{"k8s.pod.uid"},
DocumentationLink: docLinkK8sAttributesProcessor,
},
},
}
var nodesSpec = onboardingSpec{
Buckets: []onboardingComponentBucket{
{
Component: componentKubeletStatsReceiver,
DefaultMetrics: []string{
"k8s.node.cpu.usage",
"k8s.node.memory.working_set",
},
DocumentationLink: docLinkKubeletStatsReceiver,
},
{
Component: componentK8sClusterReceiver,
DefaultMetrics: []string{
"k8s.node.allocatable_cpu",
"k8s.node.allocatable_memory", // k8s.node.allocatable_cpu and k8s.node.allocatable_memory are
// controlled by allocatable_types_to_report config option (Check // https://github.com/open-telemetry/opentelemetry-collector-contrib/blob/4f9a578b210a6dcb9f9bf47942f27208b5765298/receiver/k8sclusterreceiver/metadata.yaml#L805-L806)
"k8s.node.condition_ready", // # k8s.node.condition_* metrics (k8s.node.condition_ready, k8s.node.condition_memory_pressure, etc) are controlled# by node_conditions_to_report config option.
// By default, only k8s.node.condition_ready is enabled. (Check https://github.com/open-telemetry/opentelemetry-collector-contrib/blob/4f9a578b210a6dcb9f9bf47942f27208b5765298/receiver/k8sclusterreceiver/metadata.yaml#L802)
"k8s.pod.phase", // pod counts per node by phase
},
DocumentationLink: docLinkK8sClusterReceiver,
},
{
Component: componentK8sAttributesProcessor,
RequiredAttrs: []string{"k8s.node.name"},
DocumentationLink: docLinkK8sAttributesProcessor,
},
},
}
var deploymentsSpec = onboardingSpec{
Buckets: []onboardingComponentBucket{
{
Component: componentKubeletStatsReceiver,
DefaultMetrics: []string{
"k8s.pod.cpu.usage",
"k8s.pod.memory.working_set",
},
OptionalMetrics: []string{
"k8s.pod.cpu_request_utilization",
"k8s.pod.cpu_limit_utilization",
"k8s.pod.memory_request_utilization",
"k8s.pod.memory_limit_utilization",
},
DocumentationLink: docLinkKubeletStatsReceiver,
},
{
Component: componentK8sClusterReceiver,
DefaultMetrics: []string{
"k8s.pod.phase",
"k8s.deployment.desired",
"k8s.deployment.available",
},
DocumentationLink: docLinkK8sClusterReceiver,
},
{
Component: componentK8sAttributesProcessor,
RequiredAttrs: []string{"k8s.deployment.name", "k8s.namespace.name"},
DocumentationLink: docLinkK8sAttributesProcessor,
},
{
Component: componentResourceDetectionProcessor,
RequiredAttrs: []string{"k8s.cluster.name"},
DocumentationLink: docLinkResourceDetectionProcessor,
},
},
}
var daemonsetsSpec = onboardingSpec{
Buckets: []onboardingComponentBucket{
{
Component: componentKubeletStatsReceiver,
DefaultMetrics: []string{
"k8s.pod.cpu.usage",
"k8s.pod.memory.working_set",
},
OptionalMetrics: []string{
"k8s.pod.cpu_request_utilization",
"k8s.pod.cpu_limit_utilization",
"k8s.pod.memory_request_utilization",
"k8s.pod.memory_limit_utilization",
},
DocumentationLink: docLinkKubeletStatsReceiver,
},
{
Component: componentK8sClusterReceiver,
DefaultMetrics: []string{
"k8s.pod.phase",
"k8s.daemonset.desired_scheduled_nodes",
"k8s.daemonset.current_scheduled_nodes",
},
DocumentationLink: docLinkK8sClusterReceiver,
},
{
Component: componentK8sAttributesProcessor,
RequiredAttrs: []string{"k8s.daemonset.name", "k8s.namespace.name"},
DocumentationLink: docLinkK8sAttributesProcessor,
},
{
Component: componentResourceDetectionProcessor,
RequiredAttrs: []string{"k8s.cluster.name"},
DocumentationLink: docLinkResourceDetectionProcessor,
},
},
}
var statefulsetsSpec = onboardingSpec{
Buckets: []onboardingComponentBucket{
{
Component: componentKubeletStatsReceiver,
DefaultMetrics: []string{
"k8s.pod.cpu.usage",
"k8s.pod.memory.working_set",
},
OptionalMetrics: []string{
"k8s.pod.cpu_request_utilization",
"k8s.pod.cpu_limit_utilization",
"k8s.pod.memory_request_utilization",
"k8s.pod.memory_limit_utilization",
},
DocumentationLink: docLinkKubeletStatsReceiver,
},
{
Component: componentK8sClusterReceiver,
DefaultMetrics: []string{
"k8s.pod.phase",
"k8s.statefulset.desired_pods",
"k8s.statefulset.current_pods",
},
DocumentationLink: docLinkK8sClusterReceiver,
},
{
Component: componentK8sAttributesProcessor,
RequiredAttrs: []string{"k8s.statefulset.name", "k8s.namespace.name"},
DocumentationLink: docLinkK8sAttributesProcessor,
},
{
Component: componentResourceDetectionProcessor,
RequiredAttrs: []string{"k8s.cluster.name"},
DocumentationLink: docLinkResourceDetectionProcessor,
},
},
}
var jobsSpec = onboardingSpec{
Buckets: []onboardingComponentBucket{
{
Component: componentKubeletStatsReceiver,
DefaultMetrics: []string{
"k8s.pod.cpu.usage",
"k8s.pod.memory.working_set",
},
OptionalMetrics: []string{
"k8s.pod.cpu_request_utilization",
"k8s.pod.cpu_limit_utilization",
"k8s.pod.memory_request_utilization",
"k8s.pod.memory_limit_utilization",
},
DocumentationLink: docLinkKubeletStatsReceiver,
},
{
Component: componentK8sClusterReceiver,
DefaultMetrics: []string{
"k8s.pod.phase",
"k8s.job.desired_successful_pods",
"k8s.job.active_pods",
"k8s.job.failed_pods",
"k8s.job.successful_pods",
},
DocumentationLink: docLinkK8sClusterReceiver,
},
{
Component: componentK8sAttributesProcessor,
RequiredAttrs: []string{"k8s.job.name", "k8s.namespace.name"},
DocumentationLink: docLinkK8sAttributesProcessor,
},
{
Component: componentResourceDetectionProcessor,
RequiredAttrs: []string{"k8s.cluster.name"},
DocumentationLink: docLinkResourceDetectionProcessor,
},
},
}
var namespacesSpec = onboardingSpec{
Buckets: []onboardingComponentBucket{
{
Component: componentKubeletStatsReceiver,
DefaultMetrics: []string{
"k8s.pod.cpu.usage",
"k8s.pod.memory.working_set",
},
DocumentationLink: docLinkKubeletStatsReceiver,
},
{
Component: componentK8sClusterReceiver,
DefaultMetrics: []string{"k8s.pod.phase"},
DocumentationLink: docLinkK8sClusterReceiver,
},
{
Component: componentK8sAttributesProcessor,
RequiredAttrs: []string{"k8s.namespace.name"},
DocumentationLink: docLinkK8sAttributesProcessor,
},
{
Component: componentResourceDetectionProcessor,
RequiredAttrs: []string{"k8s.cluster.name"},
DocumentationLink: docLinkResourceDetectionProcessor,
},
},
}
var clustersSpec = onboardingSpec{
Buckets: []onboardingComponentBucket{
{
Component: componentKubeletStatsReceiver,
DefaultMetrics: []string{
"k8s.node.cpu.usage",
"k8s.node.memory.working_set",
},
DocumentationLink: docLinkKubeletStatsReceiver,
},
{
Component: componentK8sClusterReceiver,
DefaultMetrics: []string{
"k8s.node.allocatable_cpu",
"k8s.node.allocatable_memory", //k8s.node.allocatable_cpu and k8s.node.allocatable_memory are
// controlled by allocatable_types_to_report config option (Check // https://github.com/open-telemetry/opentelemetry-collector-contrib/blob/4f9a578b210a6dcb9f9bf47942f27208b5765298/receiver/k8sclusterreceiver/metadata.yaml#L805-L806)
"k8s.node.condition_ready", // node counts by readiness
"k8s.pod.phase", // pod counts per cluster by phase
},
DocumentationLink: docLinkK8sClusterReceiver,
},
{
Component: componentResourceDetectionProcessor,
RequiredAttrs: []string{"k8s.cluster.name"},
DocumentationLink: docLinkResourceDetectionProcessor,
},
},
}
var volumesSpec = onboardingSpec{
Buckets: []onboardingComponentBucket{
{
Component: componentKubeletStatsReceiver,
DefaultMetrics: []string{
"k8s.volume.available",
"k8s.volume.capacity",
"k8s.volume.inodes",
"k8s.volume.inodes.free",
"k8s.volume.inodes.used",
},
DocumentationLink: docLinkKubeletStatsReceiver,
},
{
Component: componentK8sAttributesProcessor,
RequiredAttrs: []string{"k8s.persistentvolumeclaim.name", "k8s.namespace.name"},
DocumentationLink: docLinkK8sAttributesProcessor,
},
{
Component: componentResourceDetectionProcessor,
RequiredAttrs: []string{"k8s.cluster.name"},
DocumentationLink: docLinkResourceDetectionProcessor,
},
},
}

View File

@@ -1,246 +0,0 @@
package implinframonitoring
import (
"testing"
"github.com/SigNoz/signoz/pkg/types/inframonitoringtypes"
"github.com/stretchr/testify/require"
)
// Component used across splitBucket cases — it's a processor so the test
// doesn't carry any receiver semantics.
var testComponent = inframonitoringtypes.AssociatedComponent{
Type: inframonitoringtypes.OnboardingComponentTypeReceiver,
Name: "testreceiver",
}
const testDocLink = "https://example.com/docs"
func TestSplitBucket(t *testing.T) {
type want struct {
presentDefault []string
presentOptional []string
presentAttrs []string
missingDefault []string
missingOptional []string
missingAttrs []string
}
tests := []struct {
name string
bucket onboardingComponentBucket
missingMetrics map[string]bool
missingAttrs map[string]bool
want want
}{
{
name: "empty bucket — nothing to emit",
bucket: onboardingComponentBucket{Component: testComponent, DocumentationLink: testDocLink},
missingMetrics: map[string]bool{},
missingAttrs: map[string]bool{},
want: want{},
},
{
name: "all default metrics present",
bucket: onboardingComponentBucket{
Component: testComponent,
DefaultMetrics: []string{"m1", "m2"},
DocumentationLink: testDocLink,
},
missingMetrics: map[string]bool{},
missingAttrs: map[string]bool{},
want: want{
presentDefault: []string{"m1", "m2"},
},
},
{
name: "all default metrics missing",
bucket: onboardingComponentBucket{
Component: testComponent,
DefaultMetrics: []string{"m1", "m2"},
DocumentationLink: testDocLink,
},
missingMetrics: map[string]bool{"m1": true, "m2": true},
missingAttrs: map[string]bool{},
want: want{
missingDefault: []string{"m1", "m2"},
},
},
{
name: "mixed default metrics",
bucket: onboardingComponentBucket{
Component: testComponent,
DefaultMetrics: []string{"m1", "m2", "m3"},
DocumentationLink: testDocLink,
},
missingMetrics: map[string]bool{"m2": true},
missingAttrs: map[string]bool{},
want: want{
presentDefault: []string{"m1", "m3"},
missingDefault: []string{"m2"},
},
},
{
name: "only optional metrics — all missing",
bucket: onboardingComponentBucket{
Component: testComponent,
OptionalMetrics: []string{"opt1", "opt2"},
DocumentationLink: testDocLink,
},
missingMetrics: map[string]bool{"opt1": true, "opt2": true},
missingAttrs: map[string]bool{},
want: want{
missingOptional: []string{"opt1", "opt2"},
},
},
{
name: "only required attrs — all present",
bucket: onboardingComponentBucket{
Component: testComponent,
RequiredAttrs: []string{"a1", "a2"},
DocumentationLink: testDocLink,
},
missingMetrics: map[string]bool{},
missingAttrs: map[string]bool{},
want: want{
presentAttrs: []string{"a1", "a2"},
},
},
{
name: "only required attrs — all missing",
bucket: onboardingComponentBucket{
Component: testComponent,
RequiredAttrs: []string{"a1"},
DocumentationLink: testDocLink,
},
missingMetrics: map[string]bool{},
missingAttrs: map[string]bool{"a1": true},
want: want{
missingAttrs: []string{"a1"},
},
},
{
name: "every dimension populated on both sides",
bucket: onboardingComponentBucket{
Component: testComponent,
DefaultMetrics: []string{"d1", "d2"},
OptionalMetrics: []string{"o1", "o2"},
RequiredAttrs: []string{"a1", "a2"},
DocumentationLink: testDocLink,
},
missingMetrics: map[string]bool{"d2": true, "o1": true},
missingAttrs: map[string]bool{"a2": true},
want: want{
presentDefault: []string{"d1"},
missingDefault: []string{"d2"},
presentOptional: []string{"o2"},
missingOptional: []string{"o1"},
presentAttrs: []string{"a1"},
missingAttrs: []string{"a2"},
},
},
}
for _, tt := range tests {
t.Run(tt.name, func(t *testing.T) {
got := splitBucket(tt.bucket, tt.missingMetrics, tt.missingAttrs)
requireMetricsEntry(t, "presentDefault", got.PresentDefault, tt.want.presentDefault)
requireMetricsEntry(t, "presentOptional", got.PresentOptional, tt.want.presentOptional)
requireAttrsEntry(t, "presentAttrs", got.PresentAttrs, tt.want.presentAttrs)
requireMissingMetrics(t, "missingDefault", got.MissingDefault, tt.want.missingDefault)
requireMissingMetrics(t, "missingOptional", got.MissingOptional, tt.want.missingOptional)
requireMissingAttrs(t, "missingAttrs", got.MissingAttrs, tt.want.missingAttrs)
})
}
}
func TestPartitionList(t *testing.T) {
present, missing := partitionList(
[]string{"a", "b", "c", "d"},
map[string]bool{"b": true, "d": true},
)
require.Equal(t, []string{"a", "c"}, present)
require.Equal(t, []string{"b", "d"}, missing)
}
func TestMissingMessageTemplates(t *testing.T) {
require.Equal(t,
"Missing default metrics m1, m2 from comp. Learn how to configure here.",
buildMissingDefaultMetricsMessage([]string{"m1", "m2"}, "comp"),
)
require.Equal(t,
"Missing optional metrics m1 from comp. Learn how to enable here.",
buildMissingOptionalMetricsMessage([]string{"m1"}, "comp"),
)
require.Equal(t,
"Missing required attributes a1 from comp. Learn how to configure here.",
buildMissingRequiredAttrsMessage([]string{"a1"}, "comp"),
)
require.Equal(t,
"Missing required attributes a1, a2 from comp. Learn how to configure here.",
buildMissingRequiredAttrsMessage([]string{"a1", "a2"}, "comp"),
)
}
// TestOnboardingSpecs_CoverAllTypes ensures the spec map has an entry for
// every OnboardingType — prevents silently shipping an onboarding type that
// has no spec and would 500 at runtime.
func TestOnboardingSpecs_CoverAllTypes(t *testing.T) {
for _, tp := range inframonitoringtypes.ValidOnboardingTypes {
_, ok := onboardingSpecs[tp]
require.True(t, ok, "missing onboarding spec for type %s", tp)
}
require.Len(t, onboardingSpecs, len(inframonitoringtypes.ValidOnboardingTypes))
}
// --- helpers ---
func requireMetricsEntry(t *testing.T, name string, got *inframonitoringtypes.MetricsComponentEntry, wantMetrics []string) {
t.Helper()
if len(wantMetrics) == 0 {
require.Nil(t, got, name)
return
}
require.NotNil(t, got, name)
require.Equal(t, wantMetrics, got.Metrics, name)
require.Equal(t, testComponent, got.AssociatedComponent, name)
}
func requireAttrsEntry(t *testing.T, name string, got *inframonitoringtypes.AttributesComponentEntry, wantAttrs []string) {
t.Helper()
if len(wantAttrs) == 0 {
require.Nil(t, got, name)
return
}
require.NotNil(t, got, name)
require.Equal(t, wantAttrs, got.Attributes, name)
require.Equal(t, testComponent, got.AssociatedComponent, name)
}
func requireMissingMetrics(t *testing.T, name string, got *inframonitoringtypes.MissingMetricsComponentEntry, wantMetrics []string) {
t.Helper()
if len(wantMetrics) == 0 {
require.Nil(t, got, name)
return
}
require.NotNil(t, got, name)
require.Equal(t, wantMetrics, got.Metrics, name)
require.Equal(t, testComponent, got.AssociatedComponent, name)
require.NotEmpty(t, got.Message, name)
require.Equal(t, testDocLink, got.DocumentationLink, name)
}
func requireMissingAttrs(t *testing.T, name string, got *inframonitoringtypes.MissingAttributesComponentEntry, wantAttrs []string) {
t.Helper()
if len(wantAttrs) == 0 {
require.Nil(t, got, name)
return
}
require.NotNil(t, got, name)
require.Equal(t, wantAttrs, got.Attributes, name)
require.Equal(t, testComponent, got.AssociatedComponent, name)
require.NotEmpty(t, got.Message, name)
require.Equal(t, testDocLink, got.DocumentationLink, name)
}

View File

@@ -20,7 +20,6 @@ type Handler interface {
ListStatefulSets(http.ResponseWriter, *http.Request)
ListJobs(http.ResponseWriter, *http.Request)
ListDaemonSets(http.ResponseWriter, *http.Request)
GetOnboarding(http.ResponseWriter, *http.Request)
}
type Module interface {
@@ -35,5 +34,4 @@ type Module interface {
ListStatefulSets(ctx context.Context, orgID valuer.UUID, req *inframonitoringtypes.PostableStatefulSets) (*inframonitoringtypes.StatefulSets, error)
ListJobs(ctx context.Context, orgID valuer.UUID, req *inframonitoringtypes.PostableJobs) (*inframonitoringtypes.Jobs, error)
ListDaemonSets(ctx context.Context, orgID valuer.UUID, req *inframonitoringtypes.PostableDaemonSets) (*inframonitoringtypes.DaemonSets, error)
GetOnboarding(ctx context.Context, orgID valuer.UUID, req *inframonitoringtypes.PostableOnboarding) (*inframonitoringtypes.Onboarding, error)
}

View File

@@ -31,6 +31,8 @@ type builderQuery[T any] struct {
fromMS uint64
toMS uint64
kind qbtypes.RequestType
logTraceIDWindowPaddingMS uint64
}
var _ qbtypes.Query = (*builderQuery[any])(nil)
@@ -43,16 +45,18 @@ func newBuilderQuery[T any](
tr qbtypes.TimeRange,
kind qbtypes.RequestType,
variables map[string]qbtypes.VariableItem,
logTraceIDWindowPaddingMS uint64,
) *builderQuery[T] {
return &builderQuery[T]{
logger: logger,
telemetryStore: telemetryStore,
stmtBuilder: stmtBuilder,
spec: spec,
variables: variables,
fromMS: tr.From,
toMS: tr.To,
kind: kind,
logger: logger,
telemetryStore: telemetryStore,
stmtBuilder: stmtBuilder,
spec: spec,
variables: variables,
fromMS: tr.From,
toMS: tr.To,
kind: kind,
logTraceIDWindowPaddingMS: logTraceIDWindowPaddingMS,
}
}
@@ -277,9 +281,20 @@ func (q *builderQuery[T]) narrowWindowByTraceID(ctx context.Context, fromMS, toM
return fromMS, toMS, true, ""
}
// Logs can be flushed slightly after the span ends. The trace
// time range comes from the spans table, so for logs we widen it by the
// configured padding before clamping. Keep the actual recorded bounds for
// the user-facing warning so it reports where the trace truly lies, not the
// padded range.
actualStartMS, actualEndMS := traceStartMS, traceEndMS
if q.spec.Signal == telemetrytypes.SignalLogs {
traceStartMS -= q.logTraceIDWindowPaddingMS
traceEndMS += q.logTraceIDWindowPaddingMS
}
if traceStartMS > toMS || traceEndMS < fromMS {
traceStartUTC := time.UnixMilli(int64(traceStartMS)).UTC().Format(time.RFC3339)
traceEndUTC := time.UnixMilli(int64(traceEndMS)).UTC().Format(time.RFC3339)
traceStartUTC := time.UnixMilli(int64(actualStartMS)).UTC().Format(time.RFC3339)
traceEndUTC := time.UnixMilli(int64(actualEndMS)).UTC().Format(time.RFC3339)
return fromMS, toMS, false, fmt.Sprintf(traceOutsideRangeWarn, q.spec.Name, traceStartUTC, traceEndUTC)
}
if traceStartMS > fromMS {

View File

@@ -23,6 +23,8 @@ type Config struct {
MaxConcurrentQueries int `yaml:"max_concurrent_queries" mapstructure:"max_concurrent_queries"`
// SkipResourceFingerprint configures when the resource fingerprint subquery is skipped in favor of main-table filtering.
SkipResourceFingerprint SkipResourceFingerprint `yaml:"skip_resource_fingerprint" mapstructure:"skip_resource_fingerprint"`
// LogTraceIDWindowPadding is the padding added to narrowed down timerange from trace summary to logs with trace_id filter.
LogTraceIDWindowPadding time.Duration `yaml:"log_trace_id_window_padding" mapstructure:"log_trace_id_window_padding"`
}
// NewConfigFactory creates a new config factory for querier.
@@ -40,6 +42,7 @@ func newConfig() factory.Config {
Enabled: false,
Threshold: 100000,
},
LogTraceIDWindowPadding: 5 * time.Minute,
}
}
@@ -57,6 +60,9 @@ func (c Config) Validate() error {
if c.SkipResourceFingerprint.Enabled && c.SkipResourceFingerprint.Threshold == 0 {
return errors.NewInvalidInputf(errors.CodeInvalidInput, "skip_resource_fingerprint.threshold must be > 0 when enabled")
}
if c.LogTraceIDWindowPadding < 0 {
return errors.NewInvalidInputf(errors.CodeInvalidInput, "log_trace_id_window_padding must not be negative, got %v", c.LogTraceIDWindowPadding)
}
return nil
}

View File

@@ -35,19 +35,20 @@ var (
)
type querier struct {
logger *slog.Logger
fl flagger.Flagger
telemetryStore telemetrystore.TelemetryStore
metadataStore telemetrytypes.MetadataStore
promEngine prometheus.Prometheus
traceStmtBuilder qbtypes.StatementBuilder[qbtypes.TraceAggregation]
logStmtBuilder qbtypes.StatementBuilder[qbtypes.LogAggregation]
auditStmtBuilder qbtypes.StatementBuilder[qbtypes.LogAggregation]
metricStmtBuilder qbtypes.StatementBuilder[qbtypes.MetricAggregation]
meterStmtBuilder qbtypes.StatementBuilder[qbtypes.MetricAggregation]
traceOperatorStmtBuilder qbtypes.TraceOperatorStatementBuilder
bucketCache BucketCache
liveDataRefresh time.Duration
logger *slog.Logger
fl flagger.Flagger
telemetryStore telemetrystore.TelemetryStore
metadataStore telemetrytypes.MetadataStore
promEngine prometheus.Prometheus
traceStmtBuilder qbtypes.StatementBuilder[qbtypes.TraceAggregation]
logStmtBuilder qbtypes.StatementBuilder[qbtypes.LogAggregation]
auditStmtBuilder qbtypes.StatementBuilder[qbtypes.LogAggregation]
metricStmtBuilder qbtypes.StatementBuilder[qbtypes.MetricAggregation]
meterStmtBuilder qbtypes.StatementBuilder[qbtypes.MetricAggregation]
traceOperatorStmtBuilder qbtypes.TraceOperatorStatementBuilder
bucketCache BucketCache
liveDataRefresh time.Duration
logTraceIDWindowPaddingMS uint64
}
var _ Querier = (*querier)(nil)
@@ -65,22 +66,24 @@ func New(
traceOperatorStmtBuilder qbtypes.TraceOperatorStatementBuilder,
bucketCache BucketCache,
flagger flagger.Flagger,
logTraceIDWindowPadding time.Duration,
) *querier {
querierSettings := factory.NewScopedProviderSettings(settings, "github.com/SigNoz/signoz/pkg/querier")
return &querier{
logger: querierSettings.Logger(),
fl: flagger,
telemetryStore: telemetryStore,
metadataStore: metadataStore,
promEngine: promEngine,
traceStmtBuilder: traceStmtBuilder,
logStmtBuilder: logStmtBuilder,
auditStmtBuilder: auditStmtBuilder,
metricStmtBuilder: metricStmtBuilder,
meterStmtBuilder: meterStmtBuilder,
traceOperatorStmtBuilder: traceOperatorStmtBuilder,
bucketCache: bucketCache,
liveDataRefresh: 5 * time.Second,
logger: querierSettings.Logger(),
fl: flagger,
telemetryStore: telemetryStore,
metadataStore: metadataStore,
promEngine: promEngine,
traceStmtBuilder: traceStmtBuilder,
logStmtBuilder: logStmtBuilder,
auditStmtBuilder: auditStmtBuilder,
metricStmtBuilder: metricStmtBuilder,
meterStmtBuilder: meterStmtBuilder,
traceOperatorStmtBuilder: traceOperatorStmtBuilder,
bucketCache: bucketCache,
liveDataRefresh: 5 * time.Second,
logTraceIDWindowPaddingMS: uint64(logTraceIDWindowPadding.Milliseconds()),
}
}
@@ -173,7 +176,7 @@ func (q *querier) QueryRange(ctx context.Context, orgID valuer.UUID, req *qbtype
case qbtypes.QueryBuilderQuery[qbtypes.TraceAggregation]:
spec.ShiftBy = extractShiftFromBuilderQuery(spec)
timeRange := adjustTimeRangeForShift(spec, qbtypes.TimeRange{From: req.Start, To: req.End}, req.RequestType)
bq := newBuilderQuery(q.logger, q.telemetryStore, q.traceStmtBuilder, spec, timeRange, req.RequestType, tmplVars)
bq := newBuilderQuery(q.logger, q.telemetryStore, q.traceStmtBuilder, spec, timeRange, req.RequestType, tmplVars, 0)
queries[spec.Name] = bq
steps[spec.Name] = spec.StepInterval
case qbtypes.QueryBuilderQuery[qbtypes.LogAggregation]:
@@ -183,7 +186,7 @@ func (q *querier) QueryRange(ctx context.Context, orgID valuer.UUID, req *qbtype
if spec.Source == telemetrytypes.SourceAudit {
stmtBuilder = q.auditStmtBuilder
}
bq := newBuilderQuery(q.logger, q.telemetryStore, stmtBuilder, spec, timeRange, req.RequestType, tmplVars)
bq := newBuilderQuery(q.logger, q.telemetryStore, stmtBuilder, spec, timeRange, req.RequestType, tmplVars, q.logTraceIDWindowPaddingMS)
queries[spec.Name] = bq
steps[spec.Name] = spec.StepInterval
case qbtypes.QueryBuilderQuery[qbtypes.MetricAggregation]:
@@ -200,9 +203,9 @@ func (q *querier) QueryRange(ctx context.Context, orgID valuer.UUID, req *qbtype
if spec.Source == telemetrytypes.SourceMeter {
event.Source = telemetrytypes.SourceMeter.StringValue()
bq = newBuilderQuery(q.logger, q.telemetryStore, q.meterStmtBuilder, spec, timeRange, req.RequestType, tmplVars)
bq = newBuilderQuery(q.logger, q.telemetryStore, q.meterStmtBuilder, spec, timeRange, req.RequestType, tmplVars, 0)
} else {
bq = newBuilderQuery(q.logger, q.telemetryStore, q.metricStmtBuilder, spec, timeRange, req.RequestType, tmplVars)
bq = newBuilderQuery(q.logger, q.telemetryStore, q.metricStmtBuilder, spec, timeRange, req.RequestType, tmplVars, 0)
}
queries[spec.Name] = bq
@@ -508,7 +511,7 @@ func (q *querier) QueryRawStream(ctx context.Context, orgID valuer.UUID, req *qb
"id": {
Value: updatedLogID,
},
})
}, q.logTraceIDWindowPaddingMS)
queries[spec.Name] = bq
qbResp, qbErr := q.run(ctx, orgID, queries, req, nil, event, nil)
@@ -804,7 +807,7 @@ func (q *querier) createRangedQuery(originalQuery qbtypes.Query, timeRange qbtyp
specCopy := qt.spec.Copy()
specCopy.ShiftBy = extractShiftFromBuilderQuery(specCopy)
adjustedTimeRange := adjustTimeRangeForShift(specCopy, timeRange, qt.kind)
return newBuilderQuery(q.logger, q.telemetryStore, q.traceStmtBuilder, specCopy, adjustedTimeRange, qt.kind, qt.variables)
return newBuilderQuery(q.logger, q.telemetryStore, q.traceStmtBuilder, specCopy, adjustedTimeRange, qt.kind, qt.variables, 0)
case *builderQuery[qbtypes.LogAggregation]:
specCopy := qt.spec.Copy()
@@ -814,16 +817,16 @@ func (q *querier) createRangedQuery(originalQuery qbtypes.Query, timeRange qbtyp
if qt.spec.Source == telemetrytypes.SourceAudit {
shiftStmtBuilder = q.auditStmtBuilder
}
return newBuilderQuery(q.logger, q.telemetryStore, shiftStmtBuilder, specCopy, adjustedTimeRange, qt.kind, qt.variables)
return newBuilderQuery(q.logger, q.telemetryStore, shiftStmtBuilder, specCopy, adjustedTimeRange, qt.kind, qt.variables, q.logTraceIDWindowPaddingMS)
case *builderQuery[qbtypes.MetricAggregation]:
specCopy := qt.spec.Copy()
specCopy.ShiftBy = extractShiftFromBuilderQuery(specCopy)
adjustedTimeRange := adjustTimeRangeForShift(specCopy, timeRange, qt.kind)
if qt.spec.Source == telemetrytypes.SourceMeter {
return newBuilderQuery(q.logger, q.telemetryStore, q.meterStmtBuilder, specCopy, adjustedTimeRange, qt.kind, qt.variables)
return newBuilderQuery(q.logger, q.telemetryStore, q.meterStmtBuilder, specCopy, adjustedTimeRange, qt.kind, qt.variables, 0)
}
return newBuilderQuery(q.logger, q.telemetryStore, q.metricStmtBuilder, specCopy, adjustedTimeRange, qt.kind, qt.variables)
return newBuilderQuery(q.logger, q.telemetryStore, q.metricStmtBuilder, specCopy, adjustedTimeRange, qt.kind, qt.variables, 0)
case *traceOperatorQuery:
specCopy := qt.spec.Copy()
return &traceOperatorQuery{

View File

@@ -54,6 +54,7 @@ func TestQueryRange_MetricTypeMissing(t *testing.T) {
nil, // traceOperatorStmtBuilder
nil, // bucketCache
flaggertest.New(t), // flagger
0,
)
req := &qbtypes.QueryRangeRequest{
@@ -124,6 +125,7 @@ func TestQueryRange_MetricTypeFromStore(t *testing.T) {
nil, // traceOperatorStmtBuilder
nil, // bucketCache
flaggertest.New(t), // flagger
0,
)
req := &qbtypes.QueryRangeRequest{

View File

@@ -192,5 +192,6 @@ func newProvider(
traceOperatorStmtBuilder,
bucketCache,
flagger,
cfg.LogTraceIDWindowPadding,
), nil
}

View File

@@ -3,6 +3,7 @@ package rules
import (
"context"
"testing"
"time"
"github.com/SigNoz/signoz/pkg/flagger"
"github.com/SigNoz/signoz/pkg/instrumentation/instrumentationtest"
@@ -54,6 +55,7 @@ func prepareQuerierForMetrics(t *testing.T, telemetryStore telemetrystore.Teleme
nil, // traceOperatorStmtBuilder
nil, // bucketCache
flagger,
0,
), metadataStore
}
@@ -107,6 +109,7 @@ func prepareQuerierForLogs(t *testing.T, telemetryStore telemetrystore.Telemetry
nil, // traceOperatorStmtBuilder
nil, // bucketCache
fl,
5*time.Minute, // logTraceIDWindowPadding
)
}
@@ -154,5 +157,6 @@ func prepareQuerierForTraces(t *testing.T, telemetryStore telemetrystore.Telemet
nil, // traceOperatorStmtBuilder
nil, // bucketCache
fl,
0,
)
}

View File

@@ -1,83 +0,0 @@
package inframonitoringtypes
import (
"slices"
"github.com/SigNoz/signoz/pkg/errors"
)
// PostableOnboarding is the request for GET /api/v2/infra_monitoring/onboarding.
// The single `type` query param selects which infra-monitoring subsection the
// readiness check runs for.
type PostableOnboarding struct {
Type OnboardingType `query:"type" required:"true"`
}
// Validate rejects empty/unknown onboarding types.
func (req *PostableOnboarding) Validate() error {
if req == nil {
return errors.NewInvalidInputf(errors.CodeInvalidInput, "request is nil")
}
if req.Type.IsZero() {
return errors.NewInvalidInputf(errors.CodeInvalidInput, "type is required")
}
if !slices.Contains(ValidOnboardingTypes, req.Type) {
return errors.NewInvalidInputf(errors.CodeInvalidInput, "invalid type: %s", req.Type)
}
return nil
}
// Onboarding is the response for GET /api/v2/infra_monitoring/onboarding.
//
// The three present/missing pairs partition a type's requirements into three
// dimensions — default-enabled metrics, optional metrics, required attributes —
// each bucketed by the collector component (receiver or processor) that
// produces it. Ready is true iff every Missing* array is empty.
type Onboarding struct {
Type OnboardingType `json:"type" required:"true"`
Ready bool `json:"ready" required:"true"`
PresentDefaultEnabledMetrics []MetricsComponentEntry `json:"presentDefaultEnabledMetrics" required:"true"`
PresentOptionalMetrics []MetricsComponentEntry `json:"presentOptionalMetrics" required:"true"`
PresentRequiredAttributes []AttributesComponentEntry `json:"presentRequiredAttributes" required:"true"`
MissingDefaultEnabledMetrics []MissingMetricsComponentEntry `json:"missingDefaultEnabledMetrics" required:"true"`
MissingOptionalMetrics []MissingMetricsComponentEntry `json:"missingOptionalMetrics" required:"true"`
MissingRequiredAttributes []MissingAttributesComponentEntry `json:"missingRequiredAttributes" required:"true"`
}
// AssociatedComponent identifies the collector receiver or processor that a
// metric or attribute originates from. Name is free-form (e.g. "kubeletstatsreceiver").
type AssociatedComponent struct {
Type OnboardingComponentType `json:"type" required:"true"`
Name string `json:"name" required:"true"`
}
// MetricsComponentEntry lists metrics that share a single associated component.
type MetricsComponentEntry struct {
Metrics []string `json:"metrics" required:"true"`
AssociatedComponent AssociatedComponent `json:"associatedComponent" required:"true"`
}
// AttributesComponentEntry lists resource attributes that share a single associated component.
type AttributesComponentEntry struct {
Attributes []string `json:"attributes" required:"true"`
AssociatedComponent AssociatedComponent `json:"associatedComponent" required:"true"`
}
// MissingMetricsComponentEntry extends MetricsComponentEntry with a user-facing
// message and a docs link for fixing the missing metrics.
type MissingMetricsComponentEntry struct {
MetricsComponentEntry
Message string `json:"message" required:"true"`
DocumentationLink string `json:"documentationLink" required:"true"`
}
// MissingAttributesComponentEntry extends AttributesComponentEntry with a user-facing
// message and a docs link for fixing the missing attributes.
type MissingAttributesComponentEntry struct {
AttributesComponentEntry
Message string `json:"message" required:"true"`
DocumentationLink string `json:"documentationLink" required:"true"`
}

View File

@@ -1,71 +0,0 @@
package inframonitoringtypes
import "github.com/SigNoz/signoz/pkg/valuer"
// OnboardingType identifies a single infra-monitoring subsection (UI tab).
// One value per v1/v2 list API we surface in the infra-monitoring section.
type OnboardingType struct {
valuer.String
}
var (
OnboardingTypeHosts = OnboardingType{valuer.NewString("hosts")}
OnboardingTypeProcesses = OnboardingType{valuer.NewString("processes")}
OnboardingTypePods = OnboardingType{valuer.NewString("pods")}
OnboardingTypeNodes = OnboardingType{valuer.NewString("nodes")}
OnboardingTypeDeployments = OnboardingType{valuer.NewString("deployments")}
OnboardingTypeDaemonsets = OnboardingType{valuer.NewString("daemonsets")}
OnboardingTypeStatefulsets = OnboardingType{valuer.NewString("statefulsets")}
OnboardingTypeJobs = OnboardingType{valuer.NewString("jobs")}
OnboardingTypeNamespaces = OnboardingType{valuer.NewString("namespaces")}
OnboardingTypeClusters = OnboardingType{valuer.NewString("clusters")}
OnboardingTypeVolumes = OnboardingType{valuer.NewString("volumes")}
)
func (OnboardingType) Enum() []any {
return []any{
OnboardingTypeHosts,
OnboardingTypeProcesses,
OnboardingTypePods,
OnboardingTypeNodes,
OnboardingTypeDeployments,
OnboardingTypeDaemonsets,
OnboardingTypeStatefulsets,
OnboardingTypeJobs,
OnboardingTypeNamespaces,
OnboardingTypeClusters,
OnboardingTypeVolumes,
}
}
var ValidOnboardingTypes = []OnboardingType{
OnboardingTypeHosts,
OnboardingTypeProcesses,
OnboardingTypePods,
OnboardingTypeNodes,
OnboardingTypeDeployments,
OnboardingTypeDaemonsets,
OnboardingTypeStatefulsets,
OnboardingTypeJobs,
OnboardingTypeNamespaces,
OnboardingTypeClusters,
OnboardingTypeVolumes,
}
// OnboardingComponentType tags each AssociatedComponent as either a receiver or a processor.
// Only these two values are ever written by the module.
type OnboardingComponentType struct {
valuer.String
}
var (
OnboardingComponentTypeReceiver = OnboardingComponentType{valuer.NewString("receiver")}
OnboardingComponentTypeProcessor = OnboardingComponentType{valuer.NewString("processor")}
)
func (OnboardingComponentType) Enum() []any {
return []any{
OnboardingComponentTypeReceiver,
OnboardingComponentTypeProcessor,
}
}

View File

@@ -1,110 +0,0 @@
package inframonitoringtypes
import (
"testing"
"github.com/SigNoz/signoz/pkg/errors"
"github.com/SigNoz/signoz/pkg/valuer"
"github.com/stretchr/testify/require"
)
func TestPostableOnboarding_Validate(t *testing.T) {
tests := []struct {
name string
req *PostableOnboarding
wantErr bool
}{
{
name: "nil request",
req: nil,
wantErr: true,
},
{
name: "empty type",
req: &PostableOnboarding{},
wantErr: true,
},
{
name: "unknown type",
req: &PostableOnboarding{Type: OnboardingType{valuer.NewString("foo")}},
wantErr: true,
},
{
name: "hosts",
req: &PostableOnboarding{Type: OnboardingTypeHosts},
wantErr: false,
},
{
name: "processes",
req: &PostableOnboarding{Type: OnboardingTypeProcesses},
wantErr: false,
},
{
name: "pods",
req: &PostableOnboarding{Type: OnboardingTypePods},
wantErr: false,
},
{
name: "nodes",
req: &PostableOnboarding{Type: OnboardingTypeNodes},
wantErr: false,
},
{
name: "deployments",
req: &PostableOnboarding{Type: OnboardingTypeDeployments},
wantErr: false,
},
{
name: "daemonsets",
req: &PostableOnboarding{Type: OnboardingTypeDaemonsets},
wantErr: false,
},
{
name: "statefulsets",
req: &PostableOnboarding{Type: OnboardingTypeStatefulsets},
wantErr: false,
},
{
name: "jobs",
req: &PostableOnboarding{Type: OnboardingTypeJobs},
wantErr: false,
},
{
name: "namespaces",
req: &PostableOnboarding{Type: OnboardingTypeNamespaces},
wantErr: false,
},
{
name: "clusters",
req: &PostableOnboarding{Type: OnboardingTypeClusters},
wantErr: false,
},
{
name: "volumes",
req: &PostableOnboarding{Type: OnboardingTypeVolumes},
wantErr: false,
},
}
for _, tt := range tests {
t.Run(tt.name, func(t *testing.T) {
err := tt.req.Validate()
if tt.wantErr {
require.Error(t, err)
require.True(t, errors.Ast(err, errors.TypeInvalidInput), "expected error to be of type InvalidInput")
} else {
require.NoError(t, err)
}
})
}
}
// TestValidOnboardingTypes_MatchesEnum ensures the ValidOnboardingTypes slice
// stays in sync with the Enum() list — both must cover every OnboardingType value.
func TestValidOnboardingTypes_MatchesEnum(t *testing.T) {
enum := OnboardingType{}.Enum()
require.Equal(t, len(enum), len(ValidOnboardingTypes))
for i, v := range enum {
require.Equal(t, v, ValidOnboardingTypes[i])
}
}

View File

@@ -1,327 +0,0 @@
"""Integration tests for the v2 infra-monitoring onboarding endpoint.
GET /api/v2/infra_monitoring/onboarding?type=<t> reports per-tab readiness:
for each collector component it lists which required/optional metrics and
required attributes are present vs missing. `ready` is true iff every missing
list is empty (optional gaps DO block).
Presence is checked against distributed_metadata with NO time window
(pkg/modules/inframonitoring/implinframonitoring/helpers.go:423,:479): a metric
is present iff it was ever ingested; an attribute is present iff it appears as a
label on any of that type's spec metrics. So seeding here is purely "make these
(metric, label) rows exist" — no start/end, no value math. insert_metrics is
function-scoped and truncates metadata on teardown, so (serial suite) each test
sees only its own seeds.
SPECS mirrors pkg/modules/inframonitoring/implinframonitoring/onboarding_constants.go
and is the contract lock: if a Go spec changes, the matching assertion fails.
"""
from datetime import UTC, datetime
from http import HTTPStatus
import pytest
import requests
from fixtures import types
from fixtures.auth import USER_ADMIN_EMAIL, USER_ADMIN_PASSWORD
from fixtures.metrics import Metrics
ENDPOINT = "/api/v2/infra_monitoring/onboarding"
# Component names (onboarding_constants.go:9-15) + their type + docs link.
HMR = "hostmetricsreceiver"
KSR = "kubeletstatsreceiver"
KCR = "k8sclusterreceiver"
RDP = "resourcedetectionprocessor"
KAP = "k8sattributesprocessor"
COMPONENT_TYPE = {HMR: "receiver", KSR: "receiver", KCR: "receiver", RDP: "processor", KAP: "processor"}
_PODS_OPT = [
"k8s.pod.cpu_request_utilization",
"k8s.pod.cpu_limit_utilization",
"k8s.pod.memory_request_utilization",
"k8s.pod.memory_limit_utilization",
]
# Mirror of onboardingSpecs: type -> {default|optional: {component: [metrics]}, attrs: {component: [attrs]}}.
SPECS = {
"hosts": {
"default": {HMR: ["system.cpu.time", "system.memory.usage", "system.cpu.load_average.15m", "system.filesystem.usage"]},
"optional": {},
"attrs": {RDP: ["host.name"]},
},
"processes": {
"default": {HMR: ["process.cpu.time", "process.memory.usage"]},
"optional": {},
"attrs": {HMR: ["process.pid"]},
},
"pods": {
"default": {KSR: ["k8s.pod.cpu.usage", "k8s.pod.memory.working_set"], KCR: ["k8s.pod.phase"]},
"optional": {KSR: list(_PODS_OPT)},
"attrs": {KAP: ["k8s.pod.uid"]},
},
"nodes": {
"default": {
KSR: ["k8s.node.cpu.usage", "k8s.node.memory.working_set"],
KCR: ["k8s.node.allocatable_cpu", "k8s.node.allocatable_memory", "k8s.node.condition_ready", "k8s.pod.phase"],
},
"optional": {},
"attrs": {KAP: ["k8s.node.name"]},
},
"deployments": {
"default": {KSR: ["k8s.pod.cpu.usage", "k8s.pod.memory.working_set"], KCR: ["k8s.pod.phase", "k8s.deployment.desired", "k8s.deployment.available"]},
"optional": {KSR: list(_PODS_OPT)},
"attrs": {KAP: ["k8s.deployment.name", "k8s.namespace.name"], RDP: ["k8s.cluster.name"]},
},
"daemonsets": {
"default": {KSR: ["k8s.pod.cpu.usage", "k8s.pod.memory.working_set"], KCR: ["k8s.pod.phase", "k8s.daemonset.desired_scheduled_nodes", "k8s.daemonset.current_scheduled_nodes"]},
"optional": {KSR: list(_PODS_OPT)},
"attrs": {KAP: ["k8s.daemonset.name", "k8s.namespace.name"], RDP: ["k8s.cluster.name"]},
},
"statefulsets": {
"default": {KSR: ["k8s.pod.cpu.usage", "k8s.pod.memory.working_set"], KCR: ["k8s.pod.phase", "k8s.statefulset.desired_pods", "k8s.statefulset.current_pods"]},
"optional": {KSR: list(_PODS_OPT)},
"attrs": {KAP: ["k8s.statefulset.name", "k8s.namespace.name"], RDP: ["k8s.cluster.name"]},
},
"jobs": {
"default": {KSR: ["k8s.pod.cpu.usage", "k8s.pod.memory.working_set"], KCR: ["k8s.pod.phase", "k8s.job.desired_successful_pods", "k8s.job.active_pods", "k8s.job.failed_pods", "k8s.job.successful_pods"]},
"optional": {KSR: list(_PODS_OPT)},
"attrs": {KAP: ["k8s.job.name", "k8s.namespace.name"], RDP: ["k8s.cluster.name"]},
},
"namespaces": {
"default": {KSR: ["k8s.pod.cpu.usage", "k8s.pod.memory.working_set"], KCR: ["k8s.pod.phase"]},
"optional": {},
"attrs": {KAP: ["k8s.namespace.name"], RDP: ["k8s.cluster.name"]},
},
"clusters": {
"default": {KSR: ["k8s.node.cpu.usage", "k8s.node.memory.working_set"], KCR: ["k8s.node.allocatable_cpu", "k8s.node.allocatable_memory", "k8s.node.condition_ready", "k8s.pod.phase"]},
"optional": {},
"attrs": {RDP: ["k8s.cluster.name"]},
},
"volumes": {
"default": {KSR: ["k8s.volume.available", "k8s.volume.capacity", "k8s.volume.inodes", "k8s.volume.inodes.free", "k8s.volume.inodes.used"]},
"optional": {},
"attrs": {KAP: ["k8s.persistentvolumeclaim.name", "k8s.namespace.name"], RDP: ["k8s.cluster.name"]},
},
}
ALL_TYPES = list(SPECS.keys())
# --- helpers ---
def _all(d: dict) -> list:
"""Flatten a {component: [items]} map to a flat list."""
return [x for items in d.values() for x in items]
def _all_metrics(t: str) -> list:
return _all(SPECS[t]["default"]) + _all(SPECS[t]["optional"])
def _attr_labels(t: str, drop: str | None = None) -> dict:
"""Labels carrying every required attr (so they resolve present), minus `drop`."""
return {a: f"v-{a}" for a in _all(SPECS[t]["attrs"]) if a != drop}
# Marker label so every seeded metric registers in distributed_metadata even when
# `labels` is empty (insert_metrics writes a metadata row per label). Non-spec, so it
# is never counted as a present required attribute.
_SEED_MARKER = {"test.seed.marker": "1"}
def _seed(insert_metrics, metric_names: list, labels: dict) -> None:
now = datetime.now(tz=UTC).replace(microsecond=0)
insert_metrics([Metrics(metric_name=m, labels={**_SEED_MARKER, **labels}, timestamp=now, value=1.0) for m in metric_names])
def _request(signoz: types.SigNoz, token: str, type_: str | None):
params = {} if type_ is None else {"type": type_}
return requests.get(
signoz.self.host_configs["8080"].get(ENDPOINT),
headers={"authorization": f"Bearer {token}"},
params=params,
timeout=5,
)
def _grouped(entries: list, field: str) -> dict:
"""{component_name: set(items)} from a present/missing entry list; also asserts
each entry's associatedComponent.type matches the known component type."""
out: dict = {}
for e in entries:
comp = e["associatedComponent"]
assert comp["type"] == COMPONENT_TYPE[comp["name"]], f"wrong type for {comp!r}"
out.setdefault(comp["name"], set()).update(e[field])
return out
def _exp(d: dict) -> dict:
return {comp: set(items) for comp, items in d.items()}
def _check_missing_entries(entries: list) -> None:
"""Every missing entry carries a non-empty message + a non-empty docs link
(exact link not asserted — links are subject to change)."""
for e in entries:
assert e["message"], f"empty message: {e!r}"
assert e["documentationLink"], f"empty doc link: {e!r}"
# Parametrize cases derived from SPECS.
_DEFAULT_CASES = [ # one representative dropped default metric per type
pytest.param(t, comp, ms[0], id=f"{t}-{ms[0]}") for t in ALL_TYPES for comp, ms in [next(iter(SPECS[t]["default"].items()))]
]
_OPTIONAL_CASES = [ # types that have optional metrics
pytest.param(t, comp, ms[0], id=f"{t}-{ms[0]}") for t in ALL_TYPES for comp, ms in SPECS[t]["optional"].items() if ms
]
_ATTR_CASES = [pytest.param(t, comp, a, id=f"{t}-{a}") for t in ALL_TYPES for comp, attrs in SPECS[t]["attrs"].items() for a in attrs]
@pytest.mark.parametrize(
"type_,err_substr",
[
pytest.param(None, "type is required", id="missing_type"),
pytest.param("foo", "invalid type", id="invalid_type"),
],
)
def test_onboarding_validation_errors(
signoz: types.SigNoz,
create_user_admin: None, # pylint: disable=unused-argument
get_token,
type_,
err_substr: str,
) -> None:
"""Missing/unknown `type` query param → 400 invalid_input."""
token = get_token(USER_ADMIN_EMAIL, USER_ADMIN_PASSWORD)
response = _request(signoz, token, type_)
assert response.status_code == HTTPStatus.BAD_REQUEST, response.text
error = response.json()["error"]
assert error["code"] == "invalid_input"
@pytest.mark.parametrize("type_", ALL_TYPES)
def test_onboarding_empty_backend(
signoz: types.SigNoz,
create_user_admin: None, # pylint: disable=unused-argument
get_token,
insert_metrics, # noqa: ARG001 ensures metadata is truncated around this test
type_: str,
) -> None:
"""No data ingested → not ready; every default metric + required attr reported
missing (bucketed by component, with message + docs link); present lists empty."""
token = get_token(USER_ADMIN_EMAIL, USER_ADMIN_PASSWORD)
data = _request(signoz, token, type_).json()["data"]
assert data["ready"] is False
assert data["presentDefaultEnabledMetrics"] == []
assert data["presentOptionalMetrics"] == []
assert data["presentRequiredAttributes"] == []
assert _grouped(data["missingDefaultEnabledMetrics"], "metrics") == _exp(SPECS[type_]["default"])
assert _grouped(data["missingOptionalMetrics"], "metrics") == _exp(SPECS[type_]["optional"])
assert _grouped(data["missingRequiredAttributes"], "attributes") == _exp(SPECS[type_]["attrs"])
_check_missing_entries(data["missingDefaultEnabledMetrics"])
_check_missing_entries(data["missingOptionalMetrics"])
_check_missing_entries(data["missingRequiredAttributes"])
@pytest.mark.parametrize("type_", ALL_TYPES)
def test_onboarding_all_present_ready(
signoz: types.SigNoz,
create_user_admin: None, # pylint: disable=unused-argument
get_token,
insert_metrics,
type_: str,
) -> None:
"""Every default+optional metric seeded carrying all required attrs → ready;
present buckets exactly match the spec, all missing lists empty."""
_seed(insert_metrics, _all_metrics(type_), _attr_labels(type_))
token = get_token(USER_ADMIN_EMAIL, USER_ADMIN_PASSWORD)
data = _request(signoz, token, type_).json()["data"]
assert data["type"] == type_
assert data["ready"] is True
assert data["missingDefaultEnabledMetrics"] == []
assert data["missingOptionalMetrics"] == []
assert data["missingRequiredAttributes"] == []
assert _grouped(data["presentDefaultEnabledMetrics"], "metrics") == _exp(SPECS[type_]["default"])
assert _grouped(data["presentOptionalMetrics"], "metrics") == _exp(SPECS[type_]["optional"])
assert _grouped(data["presentRequiredAttributes"], "attributes") == _exp(SPECS[type_]["attrs"])
@pytest.mark.parametrize("type_,component,metric", _DEFAULT_CASES)
def test_onboarding_missing_default_metric(
signoz: types.SigNoz,
create_user_admin: None, # pylint: disable=unused-argument
get_token,
insert_metrics,
type_: str,
component: str,
metric: str,
) -> None:
"""One default metric never ingested (everything else present) → that metric is
in missingDefaultEnabledMetrics under its component; not ready."""
seed = [m for m in _all_metrics(type_) if m != metric]
_seed(insert_metrics, seed, _attr_labels(type_))
token = get_token(USER_ADMIN_EMAIL, USER_ADMIN_PASSWORD)
data = _request(signoz, token, type_).json()["data"]
assert data["ready"] is False
assert metric in _grouped(data["missingDefaultEnabledMetrics"], "metrics").get(component, set())
assert data["missingOptionalMetrics"] == []
assert data["missingRequiredAttributes"] == []
_check_missing_entries(data["missingDefaultEnabledMetrics"])
@pytest.mark.parametrize("type_,component,metric", _OPTIONAL_CASES)
def test_onboarding_missing_optional_metric(
signoz: types.SigNoz,
create_user_admin: None, # pylint: disable=unused-argument
get_token,
insert_metrics,
type_: str,
component: str,
metric: str,
) -> None:
"""One optional metric missing → reported in missingOptionalMetrics and (locked
decision) NOT ready, even though all default metrics + attrs are present."""
seed = [m for m in _all_metrics(type_) if m != metric]
_seed(insert_metrics, seed, _attr_labels(type_))
token = get_token(USER_ADMIN_EMAIL, USER_ADMIN_PASSWORD)
data = _request(signoz, token, type_).json()["data"]
assert data["ready"] is False
assert metric in _grouped(data["missingOptionalMetrics"], "metrics").get(component, set())
assert data["missingDefaultEnabledMetrics"] == []
assert data["missingRequiredAttributes"] == []
_check_missing_entries(data["missingOptionalMetrics"])
@pytest.mark.parametrize("type_,component,attr", _ATTR_CASES)
def test_onboarding_missing_required_attribute(
signoz: types.SigNoz,
create_user_admin: None, # pylint: disable=unused-argument
get_token,
insert_metrics,
type_: str,
component: str,
attr: str,
) -> None:
"""All metrics present but one required attr never seen on any of them → that
attr is in missingRequiredAttributes under its component; not ready."""
_seed(insert_metrics, _all_metrics(type_), _attr_labels(type_, drop=attr))
token = get_token(USER_ADMIN_EMAIL, USER_ADMIN_PASSWORD)
data = _request(signoz, token, type_).json()["data"]
assert data["ready"] is False
assert attr in _grouped(data["missingRequiredAttributes"], "attributes").get(component, set())
assert data["missingDefaultEnabledMetrics"] == []
assert data["missingOptionalMetrics"] == []
_check_missing_entries(data["missingRequiredAttributes"])

View File

@@ -2306,9 +2306,11 @@ def test_logs_list_filter_by_trace_id(
"""
Tests that filtering logs by trace_id uses the trace_summary lookup to
narrow the query window before scanning the logs table:
1. Returns the matching log (narrow window, single bucket).
1. Returns the matching logs (narrow window, single bucket), including a log
flushed shortly after the span ends — kept by the configured padding.
2. Does not return duplicate logs when the query window should span multiple
exponential buckets (>1 h). But is clamped to the timerange of trace.
exponential buckets (>1 h). The window is clamped to the trace's recorded
range widened by the padding, so the post-span log survives the clamp.
3. Returns no results when the query window does not contain the trace.
4. Logs carrying a trace_id whose trace is NOT in trace_summary (e.g.
traces disabled) are still returned — the lookup miss must not
@@ -2366,6 +2368,9 @@ def test_logs_list_filter_by_trace_id(
# Insert logs:
# - one with the target trace_id, at a timestamp within the trace's
# recorded window (now-10s..now-5s, padded ±1s).
# - one with the target trace_id flushed ~3s AFTER the span's recorded end
# (now-2s). This is outside the ±1s base pad but inside the multi-minute
# log_trace_id_window_padding, so it must still be returned.
# - one with an orphan trace_id whose trace was never ingested — used to
# verify the lookup miss does NOT short-circuit logs queries.
insert_logs(
@@ -2379,6 +2384,15 @@ def test_logs_list_filter_by_trace_id(
trace_id=target_trace_id,
span_id=target_root_span_id,
),
Logs(
timestamp=now - timedelta(seconds=2),
resources=common_resources,
attributes={"http.method": "POST"},
body="log flushed after the span ends, within padding window",
severity_text="INFO",
trace_id=target_trace_id,
span_id=target_root_span_id,
),
Logs(
timestamp=now - timedelta(seconds=2),
resources=common_resources,
@@ -2429,23 +2443,31 @@ def test_logs_list_filter_by_trace_id(
now_ms = int(now.timestamp() * 1000)
inside_window_body = "log inside the target trace window"
post_span_body = "log flushed after the span ends, within padding window"
# --- Test 1: narrow window (single bucket, <1 h) ---
narrow_start_ms = int((now - timedelta(minutes=5)).timestamp() * 1000)
narrow_rows, narrow_warnings = _query(narrow_start_ms, now_ms, target_trace_id)
assert len(narrow_rows) == 1, f"Expected 1 log for trace_id filter (narrow window), got {len(narrow_rows)}"
assert narrow_rows[0]["data"]["trace_id"] == target_trace_id
assert narrow_rows[0]["data"]["span_id"] == target_root_span_id
assert len(narrow_rows) == 2, f"Expected 2 logs for trace_id filter (narrow window), got {len(narrow_rows)}"
assert {r["data"]["trace_id"] for r in narrow_rows} == {target_trace_id}
narrow_bodies = {r["data"]["body"] for r in narrow_rows}
assert inside_window_body in narrow_bodies
assert post_span_body in narrow_bodies, "post-span log should be returned within the padding window"
assert not any(outside_range_msg in m for m in narrow_warnings), f"Did not expect outside-range warning, got {narrow_warnings}"
# --- Test 2: wide window (>1 h, clamp to the timerange from trace_summary) ---
# Should still return exactly one log — no duplicates from multi-bucket scan.
# --- Test 2: wide window (>1 h, clamp to the padded timerange from trace_summary) ---
# Should return exactly the two target logs — no duplicates from multi-bucket
# scan, and the post-span log survives the clamp only because of the padding.
wide_start_ms = int((now - timedelta(hours=12)).timestamp() * 1000)
wide_rows, wide_warnings = _query(wide_start_ms, now_ms, target_trace_id)
assert len(wide_rows) == 1, f"Expected 1 log for trace_id filter (wide window, multi-bucket), got {len(wide_rows)} — possible duplicate-log regression"
assert wide_rows[0]["data"]["trace_id"] == target_trace_id
assert wide_rows[0]["data"]["span_id"] == target_root_span_id
assert len(wide_rows) == 2, f"Expected 2 logs for trace_id filter (wide window, multi-bucket), got {len(wide_rows)} — possible duplicate-log regression or padding not applied"
assert {r["data"]["trace_id"] for r in wide_rows} == {target_trace_id}
wide_bodies = {r["data"]["body"] for r in wide_rows}
assert inside_window_body in wide_bodies
assert post_span_body in wide_bodies, "post-span log should survive the clamp because of the padding"
assert not any(outside_range_msg in m for m in wide_warnings), f"Did not expect outside-range warning, got {wide_warnings}"
# --- Test 3: window that does not contain the trace returns no results + warning ---

View File

@@ -15,7 +15,6 @@ from fixtures.querier import (
build_builder_query,
find_named_result,
get_all_warnings,
get_error_message,
index_series_by_label,
make_query_request,
)