mirror of
https://github.com/SigNoz/signoz.git
synced 2026-04-29 15:10:28 +01:00
* chore: baseline setup * chore: endpoint detail update * chore: added logic for hosts v3 api * fix: bug fix * chore: disk usage * chore: added validate function * chore: added some unit tests * chore: return status as a string * chore: yarn generate api * chore: removed isSendingK8sAgentsMetricsCode * chore: moved funcs * chore: added validation on order by * chore: updated spec * chore: nil pointer dereference fix in req.Filter * chore: added temporalities of metrics * chore: unified composite key function * chore: code improvements * chore: hostStatusNone added for clarity that this field can be left empty as well in payload * chore: yarn generate api * chore: return errors from getMetadata and lint fix * chore: return errors from getMetadata and lint fix * chore: added hostName logic * chore: modified getMetadata query * chore: add type for response and files rearrange * chore: warnings added passing from queryResponse warning to host lists response struct * chore: added better metrics existence check * chore: added a TODO remark * chore: added required metrics check * chore: distributed samples table to local table change for get metadata * chore: frontend fix * chore: endpoint correction * chore: endpoint modification openapi * chore: escape backtick to prevent sql injection * chore: rearrage * chore: improvements * chore: validate order by to validate function * chore: improved description * chore: added TODOs and made filterByStatus a part of filter struct * chore: ignore empty string hosts in get active hosts * feat(infra-monitoring): v2 hosts list - return counts of active & inactive hosts for custom group by attributes (#10956) * chore: add functionality for showing active and inactive counts in custom group by * chore: bug fix * chore: added subquery for active and total count * chore: ignore empty string hosts in get active hosts * fix: sinceUnixMilli for determining active hosts compute once per request * chore: refactor code * chore: rename HostsList -> ListHosts * chore: rearrangement * chore: inframonitoring types renaming * chore: added types package * chore: file structure further breakdown for clarity * chore: comments correction * chore: removed temporalities * chore: comments resolve * chore: added json tag required: true * chore: added status unauthorized * chore: remove a defensive nil map check, the function ensure non-nil map when err nil * chore: make sort stable in case of tiebreaker by comparing composite group by keys * chore: regen api client for inframonitoring Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com> --------- Co-authored-by: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
162 lines
5.6 KiB
Go
162 lines
5.6 KiB
Go
package implinframonitoring
|
|
|
|
import (
|
|
"context"
|
|
"log/slog"
|
|
"time"
|
|
|
|
"github.com/SigNoz/signoz/pkg/factory"
|
|
"github.com/SigNoz/signoz/pkg/modules/inframonitoring"
|
|
"github.com/SigNoz/signoz/pkg/querier"
|
|
"github.com/SigNoz/signoz/pkg/telemetrymetrics"
|
|
"github.com/SigNoz/signoz/pkg/telemetrystore"
|
|
"github.com/SigNoz/signoz/pkg/types/inframonitoringtypes"
|
|
qbtypes "github.com/SigNoz/signoz/pkg/types/querybuildertypes/querybuildertypesv5"
|
|
"github.com/SigNoz/signoz/pkg/types/telemetrytypes"
|
|
"github.com/SigNoz/signoz/pkg/valuer"
|
|
)
|
|
|
|
type module struct {
|
|
telemetryStore telemetrystore.TelemetryStore
|
|
telemetryMetadataStore telemetrytypes.MetadataStore
|
|
querier querier.Querier
|
|
fieldMapper qbtypes.FieldMapper
|
|
condBuilder qbtypes.ConditionBuilder
|
|
logger *slog.Logger
|
|
config inframonitoring.Config
|
|
}
|
|
|
|
// NewModule constructs the inframonitoring module with the provided dependencies.
|
|
func NewModule(
|
|
telemetryStore telemetrystore.TelemetryStore,
|
|
telemetryMetadataStore telemetrytypes.MetadataStore,
|
|
querier querier.Querier,
|
|
providerSettings factory.ProviderSettings,
|
|
cfg inframonitoring.Config,
|
|
) inframonitoring.Module {
|
|
fieldMapper := telemetrymetrics.NewFieldMapper()
|
|
condBuilder := telemetrymetrics.NewConditionBuilder(fieldMapper)
|
|
return &module{
|
|
telemetryStore: telemetryStore,
|
|
telemetryMetadataStore: telemetryMetadataStore,
|
|
querier: querier,
|
|
fieldMapper: fieldMapper,
|
|
condBuilder: condBuilder,
|
|
logger: providerSettings.Logger,
|
|
config: cfg,
|
|
}
|
|
}
|
|
|
|
func (m *module) ListHosts(ctx context.Context, orgID valuer.UUID, req *inframonitoringtypes.PostableHosts) (*inframonitoringtypes.Hosts, error) {
|
|
if err := req.Validate(); err != nil {
|
|
return nil, err
|
|
}
|
|
|
|
resp := &inframonitoringtypes.Hosts{}
|
|
|
|
// default to cpu order by
|
|
if req.OrderBy == nil {
|
|
req.OrderBy = &qbtypes.OrderBy{
|
|
Key: qbtypes.OrderByKey{
|
|
TelemetryFieldKey: telemetrytypes.TelemetryFieldKey{
|
|
Name: "cpu",
|
|
},
|
|
},
|
|
Direction: qbtypes.OrderDirectionDesc,
|
|
}
|
|
}
|
|
|
|
// default to host name group by
|
|
if len(req.GroupBy) == 0 {
|
|
req.GroupBy = []qbtypes.GroupByKey{hostNameGroupByKey}
|
|
resp.Type = inframonitoringtypes.ResponseTypeList
|
|
} else {
|
|
resp.Type = inframonitoringtypes.ResponseTypeGroupedList
|
|
}
|
|
|
|
// 1. Check which required metrics exist and get earliest retention time.
|
|
// If any required metric is missing, return early with the list of missing metrics.
|
|
// 2. If metrics exist but req.End is before the earliest reported time, return early with endTimeBeforeRetention=true.
|
|
missingMetrics, minFirstReportedUnixMilli, err := m.getMetricsExistenceAndEarliestTime(ctx, hostsTableMetricNamesList)
|
|
if err != nil {
|
|
return nil, err
|
|
}
|
|
if len(missingMetrics) > 0 {
|
|
resp.RequiredMetricsCheck = inframonitoringtypes.RequiredMetricsCheck{MissingMetrics: missingMetrics}
|
|
resp.Records = []inframonitoringtypes.HostRecord{}
|
|
resp.Total = 0
|
|
return resp, nil
|
|
}
|
|
if req.End < int64(minFirstReportedUnixMilli) {
|
|
resp.EndTimeBeforeRetention = true
|
|
resp.Records = []inframonitoringtypes.HostRecord{}
|
|
resp.Total = 0
|
|
return resp, nil
|
|
}
|
|
resp.RequiredMetricsCheck = inframonitoringtypes.RequiredMetricsCheck{MissingMetrics: []string{}}
|
|
|
|
// TOD(nikhilmantri0902): replace this separate ClickHouse query with a sub-query inside the main query builder query
|
|
// once QB supports sub-queries.
|
|
// Determine active hosts: those with metrics reported in the last 10 minutes.
|
|
// Compute the cutoff once so every downstream query/subquery agrees on what "active" means.
|
|
sinceUnixMilli := time.Now().Add(-10 * time.Minute).UTC().UnixMilli()
|
|
activeHostsMap, err := m.getActiveHosts(ctx, hostsTableMetricNamesList, hostNameAttrKey, sinceUnixMilli)
|
|
if err != nil {
|
|
return nil, err
|
|
}
|
|
|
|
// this check below modifies req.Filter by adding `AND active hosts filter` if req.FilterByStatus is set.
|
|
if m.applyHostsActiveStatusFilter(req, activeHostsMap) {
|
|
resp.Records = []inframonitoringtypes.HostRecord{}
|
|
resp.Total = 0
|
|
return resp, nil
|
|
}
|
|
|
|
metadataMap, err := m.getHostsTableMetadata(ctx, req)
|
|
if err != nil {
|
|
return nil, err
|
|
}
|
|
|
|
resp.Total = len(metadataMap)
|
|
|
|
pageGroups, err := m.getTopHostGroups(ctx, orgID, req, metadataMap)
|
|
if err != nil {
|
|
return nil, err
|
|
}
|
|
|
|
if len(pageGroups) == 0 {
|
|
resp.Records = []inframonitoringtypes.HostRecord{}
|
|
return resp, nil
|
|
}
|
|
|
|
hostsFilterExpr := ""
|
|
if req.Filter != nil {
|
|
hostsFilterExpr = req.Filter.Expression
|
|
}
|
|
|
|
fullQueryReq := buildFullQueryRequest(req.Start, req.End, hostsFilterExpr, req.GroupBy, pageGroups, m.newListHostsQuery())
|
|
queryResp, err := m.querier.QueryRange(ctx, orgID, fullQueryReq)
|
|
if err != nil {
|
|
return nil, err
|
|
}
|
|
|
|
// Compute per-group active/inactive host counts.
|
|
// When host.name is in groupBy, each row = one host, so counts are derived
|
|
// directly from activeHostsMap in buildHostRecords (no extra query needed).
|
|
// When host.name is not in groupBy, we need to run an additional query to get the counts per group for the current page,
|
|
// using the same filter expression as the main query (including user filters + page groups IN clause).
|
|
hostCounts := make(map[string]groupHostStatusCounts)
|
|
isHostNameInGroupBy := isKeyInGroupByAttrs(req.GroupBy, hostNameAttrKey)
|
|
if !isHostNameInGroupBy {
|
|
hostCounts, err = m.getPerGroupHostStatusCounts(ctx, req, hostsTableMetricNamesList, pageGroups, sinceUnixMilli)
|
|
if err != nil {
|
|
return nil, err
|
|
}
|
|
}
|
|
|
|
resp.Records = buildHostRecords(isHostNameInGroupBy, queryResp, pageGroups, req.GroupBy, metadataMap, activeHostsMap, hostCounts)
|
|
resp.Warning = queryResp.Warning
|
|
|
|
return resp, nil
|
|
}
|