Compare commits

...

12 Commits

Author SHA1 Message Date
nikhilmantri0902
aecfa1a174 chore: added validation on order by 2026-04-02 20:13:30 +05:30
nikhilmantri0902
b869d23d94 chore: moved funcs 2026-04-02 20:02:22 +05:30
nikhilmantri0902
6ee3d44f76 chore: removed isSendingK8sAgentsMetricsCode 2026-04-02 19:58:30 +05:30
nikhilmantri0902
66afa73e6f chore: return status as a string 2026-04-02 14:39:02 +05:30
nikhilmantri0902
54c604bcf4 chore: added some unit tests 2026-04-02 14:20:27 +05:30
nikhilmantri0902
c1be02ba54 chore: added validate function 2026-04-02 14:14:34 +05:30
nikhilmantri0902
d3c7ba8f45 chore: disk usage 2026-04-02 14:01:18 +05:30
nikhilmantri0902
039c4a0496 fix: bug fix 2026-04-02 11:32:49 +05:30
nikhilmantri0902
51a94b6bbc chore: added logic for hosts v3 api 2026-04-02 02:52:28 +05:30
nikhilmantri0902
bbfbb94f52 chore: merged main 2026-04-01 00:45:40 +05:30
nikhilmantri0902
d1eb9ef16f chore: endpoint detail update 2026-03-31 16:16:31 +05:30
nikhilmantri0902
3db00f8bc3 chore: baseline setup 2026-03-31 15:27:18 +05:30
18 changed files with 1925 additions and 0 deletions

View File

@@ -1114,6 +1114,69 @@ components:
enabled:
type: boolean
type: object
InframonitoringtypesHostRecord:
properties:
active:
type: boolean
cpu:
format: double
type: number
hostName:
type: string
load15:
format: double
type: number
memory:
format: double
type: number
meta:
additionalProperties: {}
nullable: true
type: object
wait:
format: double
type: number
type: object
InframonitoringtypesHostsListRequest:
properties:
end:
format: int64
type: integer
filter:
$ref: '#/components/schemas/Querybuildertypesv5Filter'
groupBy:
items:
$ref: '#/components/schemas/Querybuildertypesv5GroupByKey'
nullable: true
type: array
limit:
type: integer
offset:
type: integer
orderBy:
$ref: '#/components/schemas/Querybuildertypesv5OrderBy'
start:
format: int64
type: integer
type: object
InframonitoringtypesHostsListResponse:
properties:
endTimeBeforeRetention:
type: boolean
isSendingK8SAgentMetrics:
type: boolean
records:
items:
$ref: '#/components/schemas/InframonitoringtypesHostRecord'
nullable: true
type: array
sentAnyMetricsData:
type: boolean
total:
type: integer
type:
type: string
type: object
MetricsexplorertypesInspectMetricsRequest:
properties:
end:
@@ -7291,6 +7354,64 @@ paths:
summary: Health check
tags:
- health
/api/v2/infra-monitoring/hosts/list:
post:
deprecated: false
description: This endpoint returns a list of hosts along with other information
for each of them
operationId: HostsList
requestBody:
content:
application/json:
schema:
$ref: '#/components/schemas/InframonitoringtypesHostsListRequest'
responses:
"200":
content:
application/json:
schema:
properties:
data:
$ref: '#/components/schemas/InframonitoringtypesHostsListResponse'
status:
type: string
required:
- status
- data
type: object
description: OK
"400":
content:
application/json:
schema:
$ref: '#/components/schemas/RenderErrorResponse'
description: Bad Request
"401":
content:
application/json:
schema:
$ref: '#/components/schemas/RenderErrorResponse'
description: Unauthorized
"403":
content:
application/json:
schema:
$ref: '#/components/schemas/RenderErrorResponse'
description: Forbidden
"500":
content:
application/json:
schema:
$ref: '#/components/schemas/RenderErrorResponse'
description: Internal Server Error
security:
- api_key:
- VIEWER
- tokenizer:
- VIEWER
summary: List Hosts for Infra Monitoring
tags:
- infra-monitoring
/api/v2/livez:
get:
deprecated: false

View File

@@ -0,0 +1,33 @@
package signozapiserver
import (
"net/http"
"github.com/SigNoz/signoz/pkg/http/handler"
"github.com/SigNoz/signoz/pkg/types"
"github.com/SigNoz/signoz/pkg/types/inframonitoringtypes"
"github.com/gorilla/mux"
)
func (provider *provider) addInfraMonitoringRoutes(router *mux.Router) error {
if err := router.Handle("/api/v2/infra-monitoring/hosts/list", handler.New(
provider.authZ.ViewAccess(provider.infraMonitoringHandler.HostsList),
handler.OpenAPIDef{
ID: "HostsList",
Tags: []string{"infra-monitoring"},
Summary: "List Hosts for Infra Monitoring",
Description: "This endpoint returns a list of hosts along with other information for each of them",
Request: new(inframonitoringtypes.HostsListRequest),
RequestContentType: "application/json",
Response: new(inframonitoringtypes.HostsListResponse),
ResponseContentType: "application/json",
SuccessStatusCode: http.StatusOK,
ErrorStatusCodes: []int{http.StatusBadRequest, http.StatusUnauthorized, http.StatusInternalServerError},
Deprecated: false,
SecuritySchemes: newSecuritySchemes(types.RoleViewer),
})).Methods(http.MethodPost).GetError(); err != nil {
return err
}
return nil
}

View File

@@ -16,6 +16,7 @@ import (
"github.com/SigNoz/signoz/pkg/modules/dashboard"
"github.com/SigNoz/signoz/pkg/modules/fields"
"github.com/SigNoz/signoz/pkg/modules/metricsexplorer"
"github.com/SigNoz/signoz/pkg/modules/inframonitoring"
"github.com/SigNoz/signoz/pkg/modules/organization"
"github.com/SigNoz/signoz/pkg/modules/preference"
"github.com/SigNoz/signoz/pkg/modules/promote"
@@ -47,6 +48,7 @@ type provider struct {
dashboardModule dashboard.Module
dashboardHandler dashboard.Handler
metricsExplorerHandler metricsexplorer.Handler
infraMonitoringHandler inframonitoring.Handler
gatewayHandler gateway.Handler
fieldsHandler fields.Handler
authzHandler authz.Handler
@@ -73,6 +75,7 @@ func NewFactory(
dashboardModule dashboard.Module,
dashboardHandler dashboard.Handler,
metricsExplorerHandler metricsexplorer.Handler,
infraMonitoringHandler inframonitoring.Handler,
gatewayHandler gateway.Handler,
fieldsHandler fields.Handler,
authzHandler authz.Handler,
@@ -102,6 +105,7 @@ func NewFactory(
dashboardModule,
dashboardHandler,
metricsExplorerHandler,
infraMonitoringHandler,
gatewayHandler,
fieldsHandler,
authzHandler,
@@ -133,6 +137,7 @@ func newProvider(
dashboardModule dashboard.Module,
dashboardHandler dashboard.Handler,
metricsExplorerHandler metricsexplorer.Handler,
infraMonitoringHandler inframonitoring.Handler,
gatewayHandler gateway.Handler,
fieldsHandler fields.Handler,
authzHandler authz.Handler,
@@ -162,6 +167,7 @@ func newProvider(
dashboardModule: dashboardModule,
dashboardHandler: dashboardHandler,
metricsExplorerHandler: metricsExplorerHandler,
infraMonitoringHandler: infraMonitoringHandler,
gatewayHandler: gatewayHandler,
fieldsHandler: fieldsHandler,
authzHandler: authzHandler,
@@ -228,6 +234,10 @@ func (provider *provider) AddToRouter(router *mux.Router) error {
return err
}
if err := provider.addInfraMonitoringRoutes(router); err != nil {
return err
}
if err := provider.addGatewayRoutes(router); err != nil {
return err
}

View File

@@ -0,0 +1,33 @@
package inframonitoring
import (
"github.com/SigNoz/signoz/pkg/errors"
"github.com/SigNoz/signoz/pkg/factory"
)
type Config struct {
TelemetryStore TelemetryStoreConfig `mapstructure:"telemetrystore"`
}
type TelemetryStoreConfig struct {
Threads int `mapstructure:"threads"`
}
func NewConfigFactory() factory.ConfigFactory {
return factory.NewConfigFactory(factory.MustNewName("inframonitoring"), newConfig)
}
func newConfig() factory.Config {
return Config{
TelemetryStore: TelemetryStoreConfig{
Threads: 8,
},
}
}
func (c Config) Validate() error {
if c.TelemetryStore.Threads <= 0 {
return errors.NewInvalidInputf(errors.CodeInvalidInput, "inframonitoring.telemetrystore.threads must be positive, got %d", c.TelemetryStore.Threads)
}
return nil
}

View File

@@ -0,0 +1,233 @@
package implinframonitoring
import (
"context"
"fmt"
"strings"
"github.com/SigNoz/signoz/pkg/errors"
"github.com/SigNoz/signoz/pkg/querybuilder"
"github.com/SigNoz/signoz/pkg/telemetrymetrics"
qbtypes "github.com/SigNoz/signoz/pkg/types/querybuildertypes/querybuildertypesv5"
"github.com/SigNoz/signoz/pkg/types/telemetrytypes"
"github.com/huandu/go-sqlbuilder"
)
const (
ResponseTypeList = "list"
ResponseTypeGroupedList = "grouped_list"
)
func (m *module) buildFilterClause(ctx context.Context, filter *qbtypes.Filter, startMillis, endMillis int64) (*sqlbuilder.WhereClause, error) {
expression := ""
if filter != nil {
expression = strings.TrimSpace(filter.Expression)
}
if expression == "" {
return sqlbuilder.NewWhereClause(), nil
}
whereClauseSelectors := querybuilder.QueryStringToKeysSelectors(expression)
for idx := range whereClauseSelectors {
whereClauseSelectors[idx].Signal = telemetrytypes.SignalMetrics
whereClauseSelectors[idx].SelectorMatchType = telemetrytypes.FieldSelectorMatchTypeExact
}
keys, _, err := m.telemetryMetadataStore.GetKeysMulti(ctx, whereClauseSelectors)
if err != nil {
return nil, err
}
opts := querybuilder.FilterExprVisitorOpts{
Context: ctx,
Logger: m.logger,
FieldMapper: m.fieldMapper,
ConditionBuilder: m.condBuilder,
FullTextColumn: &telemetrytypes.TelemetryFieldKey{Name: "metric_name", FieldContext: telemetrytypes.FieldContextMetric},
FieldKeys: keys,
StartNs: querybuilder.ToNanoSecs(uint64(startMillis)),
EndNs: querybuilder.ToNanoSecs(uint64(endMillis)),
}
whereClause, err := querybuilder.PrepareWhereClause(expression, opts)
if err != nil {
return nil, err
}
if whereClause == nil || whereClause.WhereClause == nil {
return sqlbuilder.NewWhereClause(), nil
}
return whereClause.WhereClause, nil
}
// getMetricsExistenceAndEarliestTime checks whether any of the given metric names
// have been reported, and returns the total count and the earliest first-reported timestamp.
// When count is 0, minFirstReportedUnixMilli is 0.
func (m *module) getMetricsExistenceAndEarliestTime(ctx context.Context, metricNames []string) (uint64, uint64, error) {
if len(metricNames) == 0 {
return 0, 0, nil
}
sb := sqlbuilder.NewSelectBuilder()
sb.Select("count(*) AS cnt", "min(first_reported_unix_milli) AS min_first_reported")
sb.From(fmt.Sprintf("%s.%s", telemetrymetrics.DBName, telemetrymetrics.AttributesMetadataTableName))
sb.Where(sb.In("metric_name", sqlbuilder.List(metricNames)))
query, args := sb.BuildWithFlavor(sqlbuilder.ClickHouse)
var count, minFirstReported uint64
err := m.telemetryStore.ClickhouseDB().QueryRow(ctx, query, args...).Scan(&count, &minFirstReported)
if err != nil {
return 0, 0, err
}
return count, minFirstReported, nil
}
// getMetadata fetches the latest values of additionalCols for each unique combination of groupBy keys,
// within the given time range and metric names. It uses argMax(tuple(...), unix_milli) to ensure
// we always pick attribute values from the latest timestamp for each group.
//
// The returned map has a composite key of groupBy column values joined by "\x00" (null byte),
// mapping to a flat map of col_name -> col_value (includes both groupBy and additional cols).
func (m *module) getMetadata(
ctx context.Context,
metricNames []string,
groupBy []qbtypes.GroupByKey,
additionalCols []string,
filter *qbtypes.Filter,
startMs, endMs int64,
) (map[string]map[string]string, error) {
if len(metricNames) == 0 || len(groupBy) == 0 {
return nil, nil
}
// Pick the optimal timeseries table based on time range; also get adjusted start.
adjustedStart, adjustedEnd, distributedTableName, _ := telemetrymetrics.WhichTSTableToUse(
uint64(startMs), uint64(endMs), nil,
)
// Flatten groupBy keys to string names for SQL expressions and result scanning.
groupByCols := make([]string, len(groupBy))
for i, key := range groupBy {
groupByCols[i] = key.Name
}
allCols := append(groupByCols, additionalCols...)
// --- Build inner query ---
// Inner SELECT columns: JSONExtractString for each groupBy col + argMax(tuple(...)) for additional cols
innerSelectCols := make([]string, 0, len(groupByCols)+1)
for _, col := range groupByCols {
innerSelectCols = append(innerSelectCols,
fmt.Sprintf("JSONExtractString(labels, '%s') AS `%s`", col, col),
)
}
// Build the argMax(tuple(...), unix_milli) expression for all additional cols
if len(additionalCols) > 0 {
tupleArgs := make([]string, 0, len(additionalCols))
for _, col := range additionalCols {
tupleArgs = append(tupleArgs, fmt.Sprintf("JSONExtractString(labels, '%s')", col))
}
innerSelectCols = append(innerSelectCols,
fmt.Sprintf("argMax(tuple(%s), unix_milli) AS latest_attrs", strings.Join(tupleArgs, ", ")),
)
}
innerSB := sqlbuilder.NewSelectBuilder()
innerSB.Select(innerSelectCols...)
innerSB.From(fmt.Sprintf("%s.%s", telemetrymetrics.DBName, distributedTableName))
innerSB.Where(
innerSB.In("metric_name", sqlbuilder.List(metricNames)),
innerSB.GE("unix_milli", adjustedStart),
innerSB.L("unix_milli", adjustedEnd),
)
// Apply optional filter expression
if filter != nil && strings.TrimSpace(filter.Expression) != "" {
filterClause, err := m.buildFilterClause(ctx, filter, startMs, endMs)
if err != nil {
return nil, err
}
if filterClause != nil {
innerSB.AddWhereClause(sqlbuilder.CopyWhereClause(filterClause))
}
}
groupByAliases := make([]string, 0, len(groupByCols))
for _, col := range groupByCols {
groupByAliases = append(groupByAliases, fmt.Sprintf("`%s`", col))
}
innerSB.GroupBy(groupByAliases...)
innerQuery, innerArgs := innerSB.BuildWithFlavor(sqlbuilder.ClickHouse)
// --- Build outer query ---
// Outer SELECT columns: groupBy cols directly + tupleElement(latest_attrs, N) for each additionalCol
outerSelectCols := make([]string, 0, len(allCols))
for _, col := range groupByCols {
outerSelectCols = append(outerSelectCols, fmt.Sprintf("`%s`", col))
}
for i, col := range additionalCols {
outerSelectCols = append(outerSelectCols,
fmt.Sprintf("tupleElement(latest_attrs, %d) AS `%s`", i+1, col),
)
}
outerSB := sqlbuilder.NewSelectBuilder()
outerSB.Select(outerSelectCols...)
outerSB.From(fmt.Sprintf("(%s)", innerQuery))
outerQuery, _ := outerSB.BuildWithFlavor(sqlbuilder.ClickHouse)
// All ? params are in innerArgs; outer query introduces none of its own.
rows, err := m.telemetryStore.ClickhouseDB().Query(ctx, outerQuery, innerArgs...)
if err != nil {
return nil, err
}
defer rows.Close()
result := make(map[string]map[string]string)
for rows.Next() {
row := make([]string, len(allCols))
scanPtrs := make([]any, len(row))
for i := range row {
scanPtrs[i] = &row[i]
}
if err := rows.Scan(scanPtrs...); err != nil {
return nil, err
}
compositeKey := strings.Join(row[:len(groupByCols)], "\x00")
attrMap := make(map[string]string, len(allCols))
for i, col := range allCols {
attrMap[col] = row[i]
}
result[compositeKey] = attrMap
}
if err := rows.Err(); err != nil {
return nil, err
}
return result, nil
}
func (m *module) validateOrderBy(orderBy *qbtypes.OrderBy, orderByToQueryNamesMap map[string][]string) error {
if orderBy == nil {
return nil
}
if _, exists := orderByToQueryNamesMap[orderBy.Key.Name]; !exists {
return errors.NewInvalidInputf(errors.CodeInvalidInput, "invalid order by key: %s", orderBy.Key.Name)
}
if orderBy.Direction != qbtypes.OrderDirectionAsc && orderBy.Direction != qbtypes.OrderDirectionDesc {
return errors.NewInvalidInputf(errors.CodeInvalidInput, "invalid order by direction: %s", orderBy.Direction)
}
return nil
}

View File

@@ -0,0 +1,49 @@
package implinframonitoring
import (
"context"
"net/http"
"github.com/SigNoz/signoz/pkg/errors"
"github.com/SigNoz/signoz/pkg/http/binding"
"github.com/SigNoz/signoz/pkg/http/render"
"github.com/SigNoz/signoz/pkg/modules/inframonitoring"
"github.com/SigNoz/signoz/pkg/types/authtypes"
"github.com/SigNoz/signoz/pkg/types/inframonitoringtypes"
"github.com/SigNoz/signoz/pkg/valuer"
)
type handler struct {
module inframonitoring.Module
}
// NewHandler returns an inframonitoring.Handler implementation.
func NewHandler(m inframonitoring.Module) inframonitoring.Handler {
return &handler{
module: m,
}
}
func (h *handler) HostsList(rw http.ResponseWriter, req *http.Request) {
claims, err := authtypes.ClaimsFromContext(req.Context())
if err != nil {
render.Error(rw, err)
return
}
orgID := valuer.MustNewUUID(claims.OrgID)
var parsedReq inframonitoringtypes.HostsListRequest
if err := binding.JSON.BindBody(req.Body, &parsedReq); err != nil {
render.Error(rw, errors.WrapInvalidInputf(err, errors.CodeInvalidInput, "failed to parse request body"))
return
}
result, err := h.module.HostsList(context.Background(), orgID, &parsedReq)
if err != nil {
render.Error(rw, err)
return
}
render.Success(rw, http.StatusOK, result)
}

View File

@@ -0,0 +1,277 @@
package implinframonitoring
import (
"fmt"
"sort"
"strings"
qbtypes "github.com/SigNoz/signoz/pkg/types/querybuildertypes/querybuildertypesv5"
)
type rankedGroup struct {
labels map[string]string
value float64
}
func isKeyInGroupByAttrs(groupByAttrs []qbtypes.GroupByKey, key string) bool {
for _, groupBy := range groupByAttrs {
if groupBy.Name == key {
return true
}
}
return false
}
func mergeFilterExpressions(queryFilterExpr, reqFilterExpr string) string {
queryFilterExpr = strings.TrimSpace(queryFilterExpr)
reqFilterExpr = strings.TrimSpace(reqFilterExpr)
if queryFilterExpr == "" {
return reqFilterExpr
}
if reqFilterExpr == "" {
return queryFilterExpr
}
return fmt.Sprintf("(%s) AND (%s)", queryFilterExpr, reqFilterExpr)
}
func compositeKeyFromLabels(labels map[string]string, groupBy []qbtypes.GroupByKey) string {
parts := make([]string, len(groupBy))
for i, key := range groupBy {
parts[i] = labels[key.Name]
}
return strings.Join(parts, "\x00")
}
// parseAndSortGroups extracts group label maps from a ScalarData response and
// sorts them by the ranking query's aggregation value.
func parseAndSortGroups(
resp *qbtypes.QueryRangeResponse,
rankingQueryName string,
groupBy []qbtypes.GroupByKey,
direction qbtypes.OrderDirection,
) []rankedGroup {
if resp == nil || len(resp.Data.Results) == 0 {
return nil
}
// Find the ScalarData that contains the ranking column.
var sd *qbtypes.ScalarData
for _, r := range resp.Data.Results {
candidate, ok := r.(*qbtypes.ScalarData)
if !ok || candidate == nil {
continue
}
for _, col := range candidate.Columns {
if col.Type == qbtypes.ColumnTypeAggregation && col.QueryName == rankingQueryName {
sd = candidate
break
}
}
if sd != nil {
break
}
}
if sd == nil || len(sd.Data) == 0 {
return nil
}
groupColIndices := make(map[string]int)
rankingColIdx := -1
for i, col := range sd.Columns {
if col.Type == qbtypes.ColumnTypeGroup {
groupColIndices[col.Name] = i
}
if col.Type == qbtypes.ColumnTypeAggregation && col.QueryName == rankingQueryName {
rankingColIdx = i
}
}
if rankingColIdx == -1 {
return nil
}
groups := make([]rankedGroup, 0, len(sd.Data))
for _, row := range sd.Data {
labels := make(map[string]string, len(groupBy))
for _, key := range groupBy {
if idx, ok := groupColIndices[key.Name]; ok && idx < len(row) {
labels[key.Name] = fmt.Sprintf("%v", row[idx])
}
}
var value float64
if rankingColIdx < len(row) {
if v, ok := row[rankingColIdx].(float64); ok {
value = v
}
}
groups = append(groups, rankedGroup{labels: labels, value: value})
}
sort.Slice(groups, func(i, j int) bool {
if direction == qbtypes.OrderDirectionAsc {
return groups[i].value < groups[j].value
}
return groups[i].value > groups[j].value
})
return groups
}
// paginateWithBackfill returns the page of groups for [offset, offset+limit).
// The virtual sorted list is: metric-ranked groups first, then metadata-only
// groups (those in metadataMap but not in metric results) sorted alphabetically.
func paginateWithBackfill(
metricGroups []rankedGroup,
metadataMap map[string]map[string]string,
groupBy []qbtypes.GroupByKey,
offset, limit int,
) []map[string]string {
metricKeySet := make(map[string]bool, len(metricGroups))
for _, g := range metricGroups {
metricKeySet[compositeKeyFromLabels(g.labels, groupBy)] = true
}
metadataOnlyKeys := make([]string, 0)
for compositeKey := range metadataMap {
if !metricKeySet[compositeKey] {
metadataOnlyKeys = append(metadataOnlyKeys, compositeKey)
}
}
sort.Strings(metadataOnlyKeys)
totalMetric := len(metricGroups)
totalAll := totalMetric + len(metadataOnlyKeys)
end := offset + limit
if end > totalAll {
end = totalAll
}
if offset >= totalAll {
return nil
}
pageGroups := make([]map[string]string, 0, end-offset)
for i := offset; i < end; i++ {
if i < totalMetric {
pageGroups = append(pageGroups, metricGroups[i].labels)
} else {
compositeKey := metadataOnlyKeys[i-totalMetric]
attrs := metadataMap[compositeKey]
labels := make(map[string]string, len(groupBy))
for _, key := range groupBy {
labels[key.Name] = attrs[key.Name]
}
pageGroups = append(pageGroups, labels)
}
}
return pageGroups
}
// buildFullQueryRequest creates a QueryRangeRequest for all metrics,
// restricted to the given page of groups via an IN filter.
// Accepts primitive fields so it can be reused across different v2 APIs
// (hosts, pods, etc.).
func buildFullQueryRequest(
start int64,
end int64,
filterExpr string,
groupBy []qbtypes.GroupByKey,
pageGroups []map[string]string,
tableListQuery *qbtypes.QueryRangeRequest,
) *qbtypes.QueryRangeRequest {
groupValues := make(map[string][]string)
for _, labels := range pageGroups {
for k, v := range labels {
groupValues[k] = append(groupValues[k], v)
}
}
inClauses := make([]string, 0, len(groupValues))
for key, values := range groupValues {
quoted := make([]string, len(values))
for i, v := range values {
quoted[i] = fmt.Sprintf("'%s'", v)
}
inClauses = append(inClauses, fmt.Sprintf("%s IN (%s)", key, strings.Join(quoted, ", ")))
}
inFilterExpr := strings.Join(inClauses, " AND ")
fullReq := &qbtypes.QueryRangeRequest{
Start: uint64(start),
End: uint64(end),
RequestType: qbtypes.RequestTypeScalar,
CompositeQuery: qbtypes.CompositeQuery{
Queries: make([]qbtypes.QueryEnvelope, 0, len(tableListQuery.CompositeQuery.Queries)),
},
}
for _, envelope := range tableListQuery.CompositeQuery.Queries {
copied := envelope
if copied.Type == qbtypes.QueryTypeBuilder {
existingExpr := ""
if f := copied.GetFilter(); f != nil {
existingExpr = f.Expression
}
merged := mergeFilterExpressions(existingExpr, filterExpr)
merged = mergeFilterExpressions(merged, inFilterExpr)
copied.SetFilter(&qbtypes.Filter{Expression: merged})
copied.SetGroupBy(groupBy)
}
fullReq.CompositeQuery.Queries = append(fullReq.CompositeQuery.Queries, copied)
}
return fullReq
}
// parseFullQueryResponse extracts per-group metric values from the full
// composite query response. Returns compositeKey -> (queryName -> value).
// Each enabled query/formula produces its own ScalarData entry in Results,
// so we iterate over all of them and merge metrics per composite key.
func parseFullQueryResponse(
resp *qbtypes.QueryRangeResponse,
groupBy []qbtypes.GroupByKey,
) map[string]map[string]float64 {
result := make(map[string]map[string]float64)
if resp == nil || len(resp.Data.Results) == 0 {
return result
}
for _, r := range resp.Data.Results {
sd, ok := r.(*qbtypes.ScalarData)
if !ok || sd == nil {
continue
}
groupColIndices := make(map[string]int)
aggCols := make(map[int]string) // col index -> query name
for i, col := range sd.Columns {
if col.Type == qbtypes.ColumnTypeGroup {
groupColIndices[col.Name] = i
}
if col.Type == qbtypes.ColumnTypeAggregation {
aggCols[i] = col.QueryName
}
}
for _, row := range sd.Data {
labels := make(map[string]string, len(groupBy))
for _, key := range groupBy {
if idx, ok := groupColIndices[key.Name]; ok && idx < len(row) {
labels[key.Name] = fmt.Sprintf("%v", row[idx])
}
}
compositeKey := compositeKeyFromLabels(labels, groupBy)
if result[compositeKey] == nil {
result[compositeKey] = make(map[string]float64)
}
for idx, queryName := range aggCols {
if idx < len(row) {
if v, ok := row[idx].(float64); ok {
result[compositeKey][queryName] = v
}
}
}
}
}
return result
}

View File

@@ -0,0 +1,234 @@
package implinframonitoring
import (
"testing"
qbtypes "github.com/SigNoz/signoz/pkg/types/querybuildertypes/querybuildertypesv5"
"github.com/SigNoz/signoz/pkg/types/telemetrytypes"
)
func groupByKey(name string) qbtypes.GroupByKey {
return qbtypes.GroupByKey{
TelemetryFieldKey: telemetrytypes.TelemetryFieldKey{Name: name},
}
}
func TestIsKeyInGroupByAttrs(t *testing.T) {
tests := []struct {
name string
groupByAttrs []qbtypes.GroupByKey
key string
expectedFound bool
}{
{
name: "key present in single-element list",
groupByAttrs: []qbtypes.GroupByKey{groupByKey("host.name")},
key: "host.name",
expectedFound: true,
},
{
name: "key present in multi-element list",
groupByAttrs: []qbtypes.GroupByKey{
groupByKey("host.name"),
groupByKey("os.type"),
groupByKey("k8s.cluster.name"),
},
key: "os.type",
expectedFound: true,
},
{
name: "key at last position",
groupByAttrs: []qbtypes.GroupByKey{
groupByKey("host.name"),
groupByKey("os.type"),
},
key: "os.type",
expectedFound: true,
},
{
name: "key not in list",
groupByAttrs: []qbtypes.GroupByKey{groupByKey("host.name")},
key: "os.type",
expectedFound: false,
},
{
name: "empty group by list",
groupByAttrs: []qbtypes.GroupByKey{},
key: "host.name",
expectedFound: false,
},
{
name: "nil group by list",
groupByAttrs: nil,
key: "host.name",
expectedFound: false,
},
{
name: "empty key string",
groupByAttrs: []qbtypes.GroupByKey{groupByKey("host.name")},
key: "",
expectedFound: false,
},
{
name: "empty key matches empty-named group by key",
groupByAttrs: []qbtypes.GroupByKey{groupByKey("")},
key: "",
expectedFound: true,
},
{
name: "partial match does not count",
groupByAttrs: []qbtypes.GroupByKey{
groupByKey("host"),
},
key: "host.name",
expectedFound: false,
},
}
for _, tt := range tests {
t.Run(tt.name, func(t *testing.T) {
got := isKeyInGroupByAttrs(tt.groupByAttrs, tt.key)
if got != tt.expectedFound {
t.Errorf("isKeyInGroupByAttrs(%v, %q) = %v, want %v",
tt.groupByAttrs, tt.key, got, tt.expectedFound)
}
})
}
}
func TestMergeFilterExpressions(t *testing.T) {
tests := []struct {
name string
queryFilterExpr string
reqFilterExpr string
expected string
}{
{
name: "both non-empty",
queryFilterExpr: "cpu > 50",
reqFilterExpr: "host.name = 'web-1'",
expected: "(cpu > 50) AND (host.name = 'web-1')",
},
{
name: "query empty, req non-empty",
queryFilterExpr: "",
reqFilterExpr: "host.name = 'web-1'",
expected: "host.name = 'web-1'",
},
{
name: "query non-empty, req empty",
queryFilterExpr: "cpu > 50",
reqFilterExpr: "",
expected: "cpu > 50",
},
{
name: "both empty",
queryFilterExpr: "",
reqFilterExpr: "",
expected: "",
},
{
name: "whitespace-only query treated as empty",
queryFilterExpr: " ",
reqFilterExpr: "host.name = 'web-1'",
expected: "host.name = 'web-1'",
},
{
name: "whitespace-only req treated as empty",
queryFilterExpr: "cpu > 50",
reqFilterExpr: " ",
expected: "cpu > 50",
},
{
name: "both whitespace-only",
queryFilterExpr: " ",
reqFilterExpr: " ",
expected: "",
},
{
name: "leading/trailing whitespace trimmed before merge",
queryFilterExpr: " cpu > 50 ",
reqFilterExpr: " mem < 80 ",
expected: "(cpu > 50) AND (mem < 80)",
},
}
for _, tt := range tests {
t.Run(tt.name, func(t *testing.T) {
got := mergeFilterExpressions(tt.queryFilterExpr, tt.reqFilterExpr)
if got != tt.expected {
t.Errorf("mergeFilterExpressions(%q, %q) = %q, want %q",
tt.queryFilterExpr, tt.reqFilterExpr, got, tt.expected)
}
})
}
}
func TestCompositeKeyFromLabels(t *testing.T) {
tests := []struct {
name string
labels map[string]string
groupBy []qbtypes.GroupByKey
expected string
}{
{
name: "single group-by key",
labels: map[string]string{"host.name": "web-1"},
groupBy: []qbtypes.GroupByKey{groupByKey("host.name")},
expected: "web-1",
},
{
name: "multiple group-by keys joined with null separator",
labels: map[string]string{
"host.name": "web-1",
"os.type": "linux",
},
groupBy: []qbtypes.GroupByKey{groupByKey("host.name"), groupByKey("os.type")},
expected: "web-1\x00linux",
},
{
name: "missing label yields empty segment",
labels: map[string]string{"host.name": "web-1"},
groupBy: []qbtypes.GroupByKey{groupByKey("host.name"), groupByKey("os.type")},
expected: "web-1\x00",
},
{
name: "empty labels map",
labels: map[string]string{},
groupBy: []qbtypes.GroupByKey{groupByKey("host.name")},
expected: "",
},
{
name: "empty group-by slice",
labels: map[string]string{"host.name": "web-1"},
groupBy: []qbtypes.GroupByKey{},
expected: "",
},
{
name: "nil labels map",
labels: nil,
groupBy: []qbtypes.GroupByKey{groupByKey("host.name")},
expected: "",
},
{
name: "order matches group-by order, not map iteration order",
labels: map[string]string{
"z": "last",
"a": "first",
"m": "middle",
},
groupBy: []qbtypes.GroupByKey{groupByKey("a"), groupByKey("m"), groupByKey("z")},
expected: "first\x00middle\x00last",
},
}
for _, tt := range tests {
t.Run(tt.name, func(t *testing.T) {
got := compositeKeyFromLabels(tt.labels, tt.groupBy)
if got != tt.expected {
t.Errorf("compositeKeyFromLabels(%v, %v) = %q, want %q",
tt.labels, tt.groupBy, got, tt.expected)
}
})
}
}

View File

@@ -0,0 +1,472 @@
package implinframonitoring
import (
"context"
"fmt"
"slices"
"strings"
"time"
"github.com/SigNoz/signoz/pkg/telemetrymetrics"
"github.com/SigNoz/signoz/pkg/types/inframonitoringtypes"
"github.com/SigNoz/signoz/pkg/types/metrictypes"
qbtypes "github.com/SigNoz/signoz/pkg/types/querybuildertypes/querybuildertypesv5"
"github.com/SigNoz/signoz/pkg/types/telemetrytypes"
"github.com/SigNoz/signoz/pkg/valuer"
"github.com/huandu/go-sqlbuilder"
)
var (
hostNameAttrKey = "host.name"
)
// Helper group-by key used across all queries
var hostNameGroupByKey = qbtypes.GroupByKey{
TelemetryFieldKey: telemetrytypes.TelemetryFieldKey{
Name: hostNameAttrKey,
FieldContext: telemetrytypes.FieldContextResource,
FieldDataType: telemetrytypes.FieldDataTypeString,
},
}
var hostsTableMetricNamesList = []string{
"system.cpu.time",
"system.memory.usage",
"system.cpu.load_average.15m",
"system.filesystem.usage",
}
var hostAttrKeysForMetadata = []string{
"os.type",
}
// orderByToHostsQueryNames maps the orderBy column to the query/formula names
// from HostsTableListQuery used for ranking host groups.
var orderByToHostsQueryNames = map[string][]string{
"cpu": {"A", "B", "F1"},
"memory": {"C", "D", "F2"},
"wait": {"E", "F", "F3"},
"disk_usage": {"H", "I", "F4"},
"load15": {"G"},
}
func (m *module) newHostsTableListQuery() *qbtypes.QueryRangeRequest {
return &qbtypes.QueryRangeRequest{
RequestType: qbtypes.RequestTypeScalar,
CompositeQuery: qbtypes.CompositeQuery{
Queries: []qbtypes.QueryEnvelope{
// Query A: CPU usage logic (non-idle)
{
Type: qbtypes.QueryTypeBuilder,
Spec: qbtypes.QueryBuilderQuery[qbtypes.MetricAggregation]{
Name: "A",
Signal: telemetrytypes.SignalMetrics,
Aggregations: []qbtypes.MetricAggregation{
{
MetricName: "system.cpu.time",
TimeAggregation: metrictypes.TimeAggregationRate,
SpaceAggregation: metrictypes.SpaceAggregationSum,
ReduceTo: qbtypes.ReduceToAvg,
},
},
Filter: &qbtypes.Filter{
Expression: "state != 'idle'",
},
GroupBy: []qbtypes.GroupByKey{hostNameGroupByKey},
Disabled: true,
},
},
// Query B: CPU usage (all states)
{
Type: qbtypes.QueryTypeBuilder,
Spec: qbtypes.QueryBuilderQuery[qbtypes.MetricAggregation]{
Name: "B",
Signal: telemetrytypes.SignalMetrics,
Aggregations: []qbtypes.MetricAggregation{
{
MetricName: "system.cpu.time",
TimeAggregation: metrictypes.TimeAggregationRate,
SpaceAggregation: metrictypes.SpaceAggregationSum,
ReduceTo: qbtypes.ReduceToAvg,
},
},
GroupBy: []qbtypes.GroupByKey{hostNameGroupByKey},
Disabled: true,
},
},
// Formula F1: CPU Usage (%)
{
Type: qbtypes.QueryTypeFormula,
Spec: qbtypes.QueryBuilderFormula{
Name: "F1",
Expression: "A/B",
Legend: "CPU Usage (%)",
Disabled: false,
},
},
// Query C: Memory usage (state = used)
{
Type: qbtypes.QueryTypeBuilder,
Spec: qbtypes.QueryBuilderQuery[qbtypes.MetricAggregation]{
Name: "C",
Signal: telemetrytypes.SignalMetrics,
Aggregations: []qbtypes.MetricAggregation{
{
MetricName: "system.memory.usage",
TimeAggregation: metrictypes.TimeAggregationAvg,
SpaceAggregation: metrictypes.SpaceAggregationSum,
ReduceTo: qbtypes.ReduceToAvg,
},
},
Filter: &qbtypes.Filter{
Expression: "state = 'used'",
},
GroupBy: []qbtypes.GroupByKey{hostNameGroupByKey},
Disabled: true,
},
},
// Query D: Memory usage (all states)
{
Type: qbtypes.QueryTypeBuilder,
Spec: qbtypes.QueryBuilderQuery[qbtypes.MetricAggregation]{
Name: "D",
Signal: telemetrytypes.SignalMetrics,
Aggregations: []qbtypes.MetricAggregation{
{
MetricName: "system.memory.usage",
TimeAggregation: metrictypes.TimeAggregationAvg,
SpaceAggregation: metrictypes.SpaceAggregationSum,
ReduceTo: qbtypes.ReduceToAvg,
},
},
GroupBy: []qbtypes.GroupByKey{hostNameGroupByKey},
Disabled: true,
},
},
// Formula F2: Memory Usage (%)
{
Type: qbtypes.QueryTypeFormula,
Spec: qbtypes.QueryBuilderFormula{
Name: "F2",
Expression: "C/D",
Legend: "Memory Usage (%)",
Disabled: false,
},
},
// Query E: CPU Wait time (state = wait)
{
Type: qbtypes.QueryTypeBuilder,
Spec: qbtypes.QueryBuilderQuery[qbtypes.MetricAggregation]{
Name: "E",
Signal: telemetrytypes.SignalMetrics,
Aggregations: []qbtypes.MetricAggregation{
{
MetricName: "system.cpu.time",
TimeAggregation: metrictypes.TimeAggregationRate,
SpaceAggregation: metrictypes.SpaceAggregationSum,
ReduceTo: qbtypes.ReduceToAvg,
},
},
Filter: &qbtypes.Filter{
Expression: "state = 'wait'",
},
GroupBy: []qbtypes.GroupByKey{hostNameGroupByKey},
Disabled: true,
},
},
// Query F: CPU time (all states)
{
Type: qbtypes.QueryTypeBuilder,
Spec: qbtypes.QueryBuilderQuery[qbtypes.MetricAggregation]{
Name: "F",
Signal: telemetrytypes.SignalMetrics,
Aggregations: []qbtypes.MetricAggregation{
{
MetricName: "system.cpu.time",
TimeAggregation: metrictypes.TimeAggregationRate,
SpaceAggregation: metrictypes.SpaceAggregationSum,
ReduceTo: qbtypes.ReduceToAvg,
},
},
GroupBy: []qbtypes.GroupByKey{hostNameGroupByKey},
Disabled: true,
},
},
// Formula F3: CPU Wait Time (%)
{
Type: qbtypes.QueryTypeFormula,
Spec: qbtypes.QueryBuilderFormula{
Name: "F3",
Expression: "E/F",
Legend: "CPU Wait Time (%)",
Disabled: false,
},
},
// Query G: Load15
{
Type: qbtypes.QueryTypeBuilder,
Spec: qbtypes.QueryBuilderQuery[qbtypes.MetricAggregation]{
Name: "G",
Signal: telemetrytypes.SignalMetrics,
Legend: "CPU Load Average (15m)",
Aggregations: []qbtypes.MetricAggregation{
{
MetricName: "system.cpu.load_average.15m",
TimeAggregation: metrictypes.TimeAggregationAvg,
SpaceAggregation: metrictypes.SpaceAggregationSum,
ReduceTo: qbtypes.ReduceToAvg,
},
},
GroupBy: []qbtypes.GroupByKey{hostNameGroupByKey},
Disabled: false,
},
},
// Query H: Filesystem Usage (state = used)
{
Type: qbtypes.QueryTypeBuilder,
Spec: qbtypes.QueryBuilderQuery[qbtypes.MetricAggregation]{
Name: "H",
Signal: telemetrytypes.SignalMetrics,
Aggregations: []qbtypes.MetricAggregation{
{
MetricName: "system.filesystem.usage",
TimeAggregation: metrictypes.TimeAggregationAvg,
SpaceAggregation: metrictypes.SpaceAggregationSum,
ReduceTo: qbtypes.ReduceToAvg,
},
},
Filter: &qbtypes.Filter{
Expression: "state = 'used'",
},
GroupBy: []qbtypes.GroupByKey{hostNameGroupByKey},
Disabled: true,
},
},
// Query I: Filesystem Usage (all states)
{
Type: qbtypes.QueryTypeBuilder,
Spec: qbtypes.QueryBuilderQuery[qbtypes.MetricAggregation]{
Name: "I",
Signal: telemetrytypes.SignalMetrics,
Aggregations: []qbtypes.MetricAggregation{
{
MetricName: "system.filesystem.usage",
TimeAggregation: metrictypes.TimeAggregationAvg,
SpaceAggregation: metrictypes.SpaceAggregationSum,
ReduceTo: qbtypes.ReduceToAvg,
},
},
GroupBy: []qbtypes.GroupByKey{hostNameGroupByKey},
Disabled: true,
},
},
// Formula F4: Disk Usage (%)
{
Type: qbtypes.QueryTypeFormula,
Spec: qbtypes.QueryBuilderFormula{
Name: "F4",
Expression: "H/I",
Legend: "Disk Usage (%)",
Disabled: false,
},
},
},
},
}
}
// getTopHostGroups runs a ranking query for the ordering metric, sorts the
// results, paginates, and backfills from metadataMap when the page extends
// past the metric-ranked groups.
func (m *module) getTopHostGroups(
ctx context.Context,
orgID valuer.UUID,
req *inframonitoringtypes.HostsListRequest,
metadataMap map[string]map[string]string,
) ([]map[string]string, error) {
orderByKey := req.OrderBy.Key.Name
queryNamesForOrderBy := orderByToHostsQueryNames[orderByKey]
// The last entry is the formula/query whose value we sort by.
rankingQueryName := queryNamesForOrderBy[len(queryNamesForOrderBy)-1]
topReq := &qbtypes.QueryRangeRequest{
Start: uint64(req.Start),
End: uint64(req.End),
RequestType: qbtypes.RequestTypeScalar,
CompositeQuery: qbtypes.CompositeQuery{
Queries: make([]qbtypes.QueryEnvelope, 0, len(queryNamesForOrderBy)),
},
}
for _, envelope := range m.newHostsTableListQuery().CompositeQuery.Queries {
if !slices.Contains(queryNamesForOrderBy, envelope.GetQueryName()) {
continue
}
copied := envelope
if copied.Type == qbtypes.QueryTypeBuilder {
existingExpr := ""
if f := copied.GetFilter(); f != nil {
existingExpr = f.Expression
}
merged := mergeFilterExpressions(existingExpr, req.Filter.Expression)
copied.SetFilter(&qbtypes.Filter{Expression: merged})
copied.SetGroupBy(req.GroupBy)
}
topReq.CompositeQuery.Queries = append(topReq.CompositeQuery.Queries, copied)
}
resp, err := m.querier.QueryRange(ctx, orgID, topReq)
if err != nil {
return nil, err
}
allMetricGroups := parseAndSortGroups(resp, rankingQueryName, req.GroupBy, req.OrderBy.Direction)
return paginateWithBackfill(allMetricGroups, metadataMap, req.GroupBy, req.Offset, req.Limit), nil
}
// applyHostsActiveStatusFilter modifies req.Filter.Expression to include an IN/NOT IN
// clause based on FilterByStatus and the set of active hosts.
// Returns true if the caller should short-circuit with an empty result (ACTIVE
// requested but no hosts are active).
func (m *module) applyHostsActiveStatusFilter(req *inframonitoringtypes.HostsListRequest, activeHostsMap map[string]bool) (shouldShortCircuit bool) {
if req.FilterByStatus != inframonitoringtypes.HostStatusActive && req.FilterByStatus != inframonitoringtypes.HostStatusInactive {
return false
}
activeHosts := make([]string, 0, len(activeHostsMap))
for host := range activeHostsMap {
activeHosts = append(activeHosts, fmt.Sprintf("'%s'", host))
}
if len(activeHosts) == 0 {
return req.FilterByStatus == inframonitoringtypes.HostStatusActive
}
op := "IN"
if req.FilterByStatus == inframonitoringtypes.HostStatusInactive {
op = "NOT IN"
}
statusClause := fmt.Sprintf("%s %s (%s)", hostNameAttrKey, op, strings.Join(activeHosts, ", "))
req.Filter.Expression = fmt.Sprintf("(%s) AND (%s)", req.Filter.Expression, statusClause)
return false
}
func (m *module) getHostsTableMetadata(ctx context.Context, req *inframonitoringtypes.HostsListRequest) (map[string]map[string]string, error) {
var nonGroupByAttrs []string
for _, key := range hostAttrKeysForMetadata {
if !isKeyInGroupByAttrs(req.GroupBy, key) {
nonGroupByAttrs = append(nonGroupByAttrs, key)
}
}
metadataMap, err := m.getMetadata(ctx, hostsTableMetricNamesList, req.GroupBy, nonGroupByAttrs, req.Filter, req.Start, req.End)
if err != nil {
return nil, err
}
return metadataMap, nil
}
// buildHostRecords constructs the final list of HostRecords for a page.
// Groups that had no metric data get default values of -1.
func (m *module) buildHostRecords(
resp *qbtypes.QueryRangeResponse,
pageGroups []map[string]string,
groupBy []qbtypes.GroupByKey,
metadataMap map[string]map[string]string,
activeHostsMap map[string]bool,
) []inframonitoringtypes.HostRecord {
metricsMap := parseFullQueryResponse(resp, groupBy)
records := make([]inframonitoringtypes.HostRecord, 0, len(pageGroups))
for _, labels := range pageGroups {
compositeKey := compositeKeyFromLabels(labels, groupBy)
hostName := labels[hostNameAttrKey]
var activeStatus string
if isActive, known := activeHostsMap[hostName]; known {
if isActive {
activeStatus = inframonitoringtypes.HostStatusActive.StringValue()
} else {
activeStatus = inframonitoringtypes.HostStatusInactive.StringValue()
}
}
record := inframonitoringtypes.HostRecord{
HostName: hostName,
Status: activeStatus,
CPU: -1,
Memory: -1,
Wait: -1,
Load15: -1,
DiskUsage: -1,
Meta: map[string]interface{}{},
}
if metrics, ok := metricsMap[compositeKey]; ok {
if v, exists := metrics["F1"]; exists {
record.CPU = v
}
if v, exists := metrics["F2"]; exists {
record.Memory = v
}
if v, exists := metrics["F3"]; exists {
record.Wait = v
}
if v, exists := metrics["F4"]; exists {
record.DiskUsage = v
}
if v, exists := metrics["G"]; exists {
record.Load15 = v
}
}
if attrs, ok := metadataMap[compositeKey]; ok {
for k, v := range attrs {
record.Meta[k] = v
}
}
records = append(records, record)
}
return records
}
// getActiveHosts returns a set of host names that have reported metrics recently (since sinceUnixMilli).
// It queries distributed_metadata for hosts where last_reported_unix_milli >= sinceUnixMilli.
func (m *module) getActiveHosts(ctx context.Context, metricNames []string, hostNameAttr string) (map[string]bool, error) {
sinceUnixMilli := time.Now().Add(-10 * time.Minute).UTC().UnixMilli()
sb := sqlbuilder.NewSelectBuilder()
sb.Distinct()
sb.Select("attr_string_value")
sb.From(fmt.Sprintf("%s.%s", telemetrymetrics.DBName, telemetrymetrics.AttributesMetadataTableName))
sb.Where(
sb.In("metric_name", sqlbuilder.List(metricNames)),
sb.E("attr_name", hostNameAttr),
sb.GE("last_reported_unix_milli", sinceUnixMilli),
)
query, args := sb.BuildWithFlavor(sqlbuilder.ClickHouse)
rows, err := m.telemetryStore.ClickhouseDB().Query(ctx, query, args...)
if err != nil {
return nil, err
}
defer rows.Close()
activeHosts := make(map[string]bool)
for rows.Next() {
var hostName string
if err := rows.Scan(&hostName); err != nil {
return nil, err
}
if hostName != "" {
activeHosts[hostName] = true
}
}
if err := rows.Err(); err != nil {
return nil, err
}
return activeHosts, nil
}

View File

@@ -0,0 +1,140 @@
package implinframonitoring
import (
"context"
"log/slog"
"github.com/SigNoz/signoz/pkg/factory"
"github.com/SigNoz/signoz/pkg/modules/inframonitoring"
"github.com/SigNoz/signoz/pkg/querier"
"github.com/SigNoz/signoz/pkg/telemetrymetrics"
"github.com/SigNoz/signoz/pkg/telemetrystore"
"github.com/SigNoz/signoz/pkg/types/inframonitoringtypes"
qbtypes "github.com/SigNoz/signoz/pkg/types/querybuildertypes/querybuildertypesv5"
"github.com/SigNoz/signoz/pkg/types/telemetrytypes"
"github.com/SigNoz/signoz/pkg/valuer"
)
type module struct {
telemetryStore telemetrystore.TelemetryStore
telemetryMetadataStore telemetrytypes.MetadataStore
querier querier.Querier
fieldMapper qbtypes.FieldMapper
condBuilder qbtypes.ConditionBuilder
logger *slog.Logger
config inframonitoring.Config
}
// NewModule constructs the inframonitoring module with the provided dependencies.
func NewModule(
telemetryStore telemetrystore.TelemetryStore,
telemetryMetadataStore telemetrytypes.MetadataStore,
querier querier.Querier,
providerSettings factory.ProviderSettings,
cfg inframonitoring.Config,
) inframonitoring.Module {
fieldMapper := telemetrymetrics.NewFieldMapper()
condBuilder := telemetrymetrics.NewConditionBuilder(fieldMapper)
return &module{
telemetryStore: telemetryStore,
telemetryMetadataStore: telemetryMetadataStore,
querier: querier,
fieldMapper: fieldMapper,
condBuilder: condBuilder,
logger: providerSettings.Logger,
config: cfg,
}
}
func (m *module) HostsList(ctx context.Context, orgID valuer.UUID, req *inframonitoringtypes.HostsListRequest) (*inframonitoringtypes.HostsListResponse, error) {
if err := req.Validate(); err != nil {
return nil, err
}
resp := &inframonitoringtypes.HostsListResponse{}
// default to cpu order by
if req.OrderBy == nil {
req.OrderBy = &qbtypes.OrderBy{
Key: qbtypes.OrderByKey{
TelemetryFieldKey: telemetrytypes.TelemetryFieldKey{
Name: "cpu",
},
},
Direction: qbtypes.OrderDirectionDesc,
}
}
if err := m.validateOrderBy(req.OrderBy, orderByToHostsQueryNames); err != nil {
return nil, err
}
// default to host name group by
if len(req.GroupBy) == 0 {
req.GroupBy = []qbtypes.GroupByKey{hostNameGroupByKey}
resp.Type = ResponseTypeList
} else {
resp.Type = ResponseTypeGroupedList
}
// 1. Check if any host metrics exist and get earliest retention time.
// If no host metrics exist, return early — the UI shows the onboarding guide.
// 2. If metrics exist but req.End is before the earliest reported time, convey retention boundary.
if count, minFirstReportedUnixMilli, err := m.getMetricsExistenceAndEarliestTime(ctx, hostsTableMetricNamesList); err == nil {
if count == 0 {
resp.SentAnyMetricsData = false
resp.Records = []inframonitoringtypes.HostRecord{}
resp.Total = 0
return resp, nil
}
resp.SentAnyMetricsData = true
if req.End < int64(minFirstReportedUnixMilli) {
resp.EndTimeBeforeRetention = true
resp.Records = []inframonitoringtypes.HostRecord{}
resp.Total = 0
return resp, nil
}
}
// Determine active hosts: those with metrics reported in the last 10 minutes.
activeHostsMap, err := m.getActiveHosts(ctx, hostsTableMetricNamesList, hostNameAttrKey)
if err != nil {
return nil, err
}
if m.applyHostsActiveStatusFilter(req, activeHostsMap) {
resp.Records = []inframonitoringtypes.HostRecord{}
resp.Total = 0
return resp, nil
}
metadataMap, err := m.getHostsTableMetadata(ctx, req)
if err != nil {
return nil, err
}
if metadataMap == nil {
metadataMap = make(map[string]map[string]string)
}
resp.Total = len(metadataMap)
pageGroups, err := m.getTopHostGroups(ctx, orgID, req, metadataMap)
if err != nil {
return nil, err
}
if len(pageGroups) == 0 {
resp.Records = []inframonitoringtypes.HostRecord{}
return resp, nil
}
fullQueryReq := buildFullQueryRequest(req.Start, req.End, req.Filter.Expression, req.GroupBy, pageGroups, m.newHostsTableListQuery())
queryResp, err := m.querier.QueryRange(ctx, orgID, fullQueryReq)
if err != nil {
return nil, err
}
resp.Records = m.buildHostRecords(queryResp, pageGroups, req.GroupBy, metadataMap, activeHostsMap)
return resp, nil
}

View File

@@ -0,0 +1,17 @@
package inframonitoring
import (
"context"
"net/http"
"github.com/SigNoz/signoz/pkg/types/inframonitoringtypes"
"github.com/SigNoz/signoz/pkg/valuer"
)
type Handler interface {
HostsList(http.ResponseWriter, *http.Request)
}
type Module interface {
HostsList(ctx context.Context, orgID valuer.UUID, req *inframonitoringtypes.HostsListRequest) (*inframonitoringtypes.HostsListResponse, error)
}

View File

@@ -21,6 +21,7 @@ import (
"github.com/SigNoz/signoz/pkg/global"
"github.com/SigNoz/signoz/pkg/identn"
"github.com/SigNoz/signoz/pkg/instrumentation"
"github.com/SigNoz/signoz/pkg/modules/inframonitoring"
"github.com/SigNoz/signoz/pkg/modules/metricsexplorer"
"github.com/SigNoz/signoz/pkg/modules/user"
"github.com/SigNoz/signoz/pkg/pprof"
@@ -111,6 +112,9 @@ type Config struct {
// MetricsExplorer config
MetricsExplorer metricsexplorer.Config `mapstructure:"metricsexplorer"`
// InfraMonitoring config
InfraMonitoring inframonitoring.Config `mapstructure:"inframonitoring"`
// Flagger config
Flagger flagger.Config `mapstructure:"flagger"`
@@ -145,6 +149,7 @@ func NewConfig(ctx context.Context, logger *slog.Logger, resolverConfig config.R
gateway.NewConfigFactory(),
tokenizer.NewConfigFactory(),
metricsexplorer.NewConfigFactory(),
inframonitoring.NewConfigFactory(),
flagger.NewConfigFactory(),
user.NewConfigFactory(),
identn.NewConfigFactory(),

View File

@@ -18,6 +18,8 @@ import (
"github.com/SigNoz/signoz/pkg/modules/dashboard/impldashboard"
"github.com/SigNoz/signoz/pkg/modules/fields"
"github.com/SigNoz/signoz/pkg/modules/fields/implfields"
"github.com/SigNoz/signoz/pkg/modules/inframonitoring"
"github.com/SigNoz/signoz/pkg/modules/inframonitoring/implinframonitoring"
"github.com/SigNoz/signoz/pkg/modules/metricsexplorer"
"github.com/SigNoz/signoz/pkg/modules/metricsexplorer/implmetricsexplorer"
"github.com/SigNoz/signoz/pkg/modules/quickfilter"
@@ -51,6 +53,7 @@ type Handlers struct {
SpanPercentile spanpercentile.Handler
Services services.Handler
MetricsExplorer metricsexplorer.Handler
InfraMonitoring inframonitoring.Handler
Global global.Handler
FlaggerHandler flagger.Handler
GatewayHandler gateway.Handler
@@ -87,6 +90,7 @@ func NewHandlers(
RawDataExport: implrawdataexport.NewHandler(modules.RawDataExport),
Services: implservices.NewHandler(modules.Services),
MetricsExplorer: implmetricsexplorer.NewHandler(modules.MetricsExplorer),
InfraMonitoring: implinframonitoring.NewHandler(modules.InfraMonitoring),
SpanPercentile: implspanpercentile.NewHandler(modules.SpanPercentile),
Global: signozglobal.NewHandler(global),
FlaggerHandler: flagger.NewHandler(flaggerService),

View File

@@ -13,6 +13,8 @@ import (
"github.com/SigNoz/signoz/pkg/modules/authdomain"
"github.com/SigNoz/signoz/pkg/modules/authdomain/implauthdomain"
"github.com/SigNoz/signoz/pkg/modules/dashboard"
"github.com/SigNoz/signoz/pkg/modules/inframonitoring"
"github.com/SigNoz/signoz/pkg/modules/inframonitoring/implinframonitoring"
"github.com/SigNoz/signoz/pkg/modules/metricsexplorer"
"github.com/SigNoz/signoz/pkg/modules/metricsexplorer/implmetricsexplorer"
"github.com/SigNoz/signoz/pkg/modules/organization"
@@ -69,6 +71,7 @@ type Modules struct {
Services services.Module
SpanPercentile spanpercentile.Module
MetricsExplorer metricsexplorer.Module
InfraMonitoring inframonitoring.Module
Promote promote.Module
ServiceAccount serviceaccount.Module
RuleStateHistory rulestatehistory.Module
@@ -116,6 +119,7 @@ func NewModules(
SpanPercentile: implspanpercentile.NewModule(querier, providerSettings),
Services: implservices.NewModule(querier, telemetryStore),
MetricsExplorer: implmetricsexplorer.NewModule(telemetryStore, telemetryMetadataStore, cache, ruleStore, dashboard, providerSettings, config.MetricsExplorer),
InfraMonitoring: implinframonitoring.NewModule(telemetryStore, telemetryMetadataStore, querier, providerSettings, config.InfraMonitoring),
Promote: implpromote.NewModule(telemetryMetadataStore, telemetryStore),
ServiceAccount: implserviceaccount.NewModule(implserviceaccount.NewStore(sqlstore), authz, emailing, providerSettings),
RuleStateHistory: implrulestatehistory.NewModule(implrulestatehistory.NewStore(telemetryStore, telemetryMetadataStore, providerSettings.Logger)),

View File

@@ -21,6 +21,7 @@ import (
"github.com/SigNoz/signoz/pkg/modules/dashboard"
"github.com/SigNoz/signoz/pkg/modules/fields"
"github.com/SigNoz/signoz/pkg/modules/metricsexplorer"
"github.com/SigNoz/signoz/pkg/modules/inframonitoring"
"github.com/SigNoz/signoz/pkg/modules/organization"
"github.com/SigNoz/signoz/pkg/modules/preference"
"github.com/SigNoz/signoz/pkg/modules/promote"
@@ -59,6 +60,7 @@ func NewOpenAPI(ctx context.Context, instrumentation instrumentation.Instrumenta
struct{ dashboard.Module }{},
struct{ dashboard.Handler }{},
struct{ metricsexplorer.Handler }{},
struct{ inframonitoring.Handler }{},
struct{ gateway.Handler }{},
struct{ fields.Handler }{},
struct{ authz.Handler }{},

View File

@@ -271,6 +271,7 @@ func NewAPIServerProviderFactories(orgGetter organization.Getter, authz authz.Au
modules.Dashboard,
handlers.Dashboard,
handlers.MetricsExplorer,
handlers.InfraMonitoring,
handlers.GatewayHandler,
handlers.Fields,
handlers.AuthzHandler,

View File

@@ -0,0 +1,112 @@
package inframonitoringtypes
import (
"encoding/json"
"github.com/SigNoz/signoz/pkg/errors"
qbtypes "github.com/SigNoz/signoz/pkg/types/querybuildertypes/querybuildertypesv5"
"github.com/SigNoz/signoz/pkg/valuer"
)
type HostStatus struct {
valuer.String
}
var (
HostStatusActive = HostStatus{valuer.NewString("active")}
HostStatusInactive = HostStatus{valuer.NewString("inactive")}
)
func (HostStatus) Enum() []any {
return []any{
HostStatusActive,
HostStatusInactive,
}
}
type HostsListRequest struct {
Start int64 `json:"start"`
End int64 `json:"end"`
Filter *qbtypes.Filter `json:"filter"`
FilterByStatus HostStatus `json:"filterByStatus"`
GroupBy []qbtypes.GroupByKey `json:"groupBy"`
OrderBy *qbtypes.OrderBy `json:"orderBy"`
Offset int `json:"offset"`
Limit int `json:"limit"`
}
// Validate ensures HostsListRequest contains acceptable values.
func (req *HostsListRequest) Validate() error {
if req == nil {
return errors.NewInvalidInputf(errors.CodeInvalidInput, "request is nil")
}
if req.Start <= 0 {
return errors.NewInvalidInputf(
errors.CodeInvalidInput,
"invalid start time %d: start must be greater than 0",
req.Start,
)
}
if req.End <= 0 {
return errors.NewInvalidInputf(
errors.CodeInvalidInput,
"invalid end time %d: end must be greater than 0",
req.End,
)
}
if req.Start >= req.End {
return errors.NewInvalidInputf(
errors.CodeInvalidInput,
"invalid time range: start (%d) must be less than end (%d)",
req.Start,
req.End,
)
}
if req.Limit < 1 || req.Limit > 5000 {
return errors.NewInvalidInputf(errors.CodeInvalidInput, "limit must be between 1 and 5000")
}
if req.Offset < 0 {
return errors.NewInvalidInputf(errors.CodeInvalidInput, "offset cannot be negative")
}
if !req.FilterByStatus.IsZero() && req.FilterByStatus != HostStatusActive && req.FilterByStatus != HostStatusInactive {
return errors.NewInvalidInputf(errors.CodeInvalidInput, "invalid filter by status: %s", req.FilterByStatus)
}
return nil
}
// UnmarshalJSON validates input immediately after decoding.
func (req *HostsListRequest) UnmarshalJSON(data []byte) error {
type raw HostsListRequest
var decoded raw
if err := json.Unmarshal(data, &decoded); err != nil {
return err
}
*req = HostsListRequest(decoded)
return req.Validate()
}
type HostsListResponse struct {
Type string `json:"type"` // TODO(nikhilmantri0902): should this also be changed to valuer.string?
Records []HostRecord `json:"records"`
Total int `json:"total"`
SentAnyMetricsData bool `json:"sentAnyMetricsData"`
EndTimeBeforeRetention bool `json:"endTimeBeforeRetention"`
}
type HostRecord struct {
HostName string `json:"hostName"`
Status string `json:"status"`
CPU float64 `json:"cpu"`
Memory float64 `json:"memory"`
Wait float64 `json:"wait"`
Load15 float64 `json:"load15"`
DiskUsage float64 `json:"diskUsage"`
Meta map[string]interface{} `json:"meta"`
}

View File

@@ -0,0 +1,178 @@
package inframonitoringtypes
import (
"testing"
"github.com/SigNoz/signoz/pkg/errors"
"github.com/SigNoz/signoz/pkg/valuer"
"github.com/stretchr/testify/require"
)
func TestHostsListRequest_Validate(t *testing.T) {
tests := []struct {
name string
req *HostsListRequest
wantErr bool
}{
{
name: "valid request",
req: &HostsListRequest{
Start: 1000,
End: 2000,
Limit: 100,
Offset: 0,
},
wantErr: false,
},
{
name: "nil request",
req: nil,
wantErr: true,
},
{
name: "start time zero",
req: &HostsListRequest{
Start: 0,
End: 2000,
Limit: 100,
Offset: 0,
},
wantErr: true,
},
{
name: "start time negative",
req: &HostsListRequest{
Start: -1000,
End: 2000,
Limit: 100,
Offset: 0,
},
wantErr: true,
},
{
name: "end time zero",
req: &HostsListRequest{
Start: 1000,
End: 0,
Limit: 100,
Offset: 0,
},
wantErr: true,
},
{
name: "start time greater than end time",
req: &HostsListRequest{
Start: 2000,
End: 1000,
Limit: 100,
Offset: 0,
},
wantErr: true,
},
{
name: "start time equal to end time",
req: &HostsListRequest{
Start: 1000,
End: 1000,
Limit: 100,
Offset: 0,
},
wantErr: true,
},
{
name: "limit zero",
req: &HostsListRequest{
Start: 1000,
End: 2000,
Limit: 0,
Offset: 0,
},
wantErr: true,
},
{
name: "limit negative",
req: &HostsListRequest{
Start: 1000,
End: 2000,
Limit: -10,
Offset: 0,
},
wantErr: true,
},
{
name: "limit exceeds max",
req: &HostsListRequest{
Start: 1000,
End: 2000,
Limit: 5001,
Offset: 0,
},
wantErr: true,
},
{
name: "offset negative",
req: &HostsListRequest{
Start: 1000,
End: 2000,
Limit: 100,
Offset: -5,
},
wantErr: true,
},
{
name: "filter by status ACTIVE",
req: &HostsListRequest{
Start: 1000,
End: 2000,
Limit: 100,
Offset: 0,
FilterByStatus: HostStatusActive,
},
wantErr: false,
},
{
name: "filter by status INACTIVE",
req: &HostsListRequest{
Start: 1000,
End: 2000,
Limit: 100,
Offset: 0,
FilterByStatus: HostStatusInactive,
},
wantErr: false,
},
{
name: "filter by status empty (zero value)",
req: &HostsListRequest{
Start: 1000,
End: 2000,
Limit: 100,
Offset: 0,
},
wantErr: false,
},
{
name: "filter by status invalid value",
req: &HostsListRequest{
Start: 1000,
End: 2000,
Limit: 100,
Offset: 0,
FilterByStatus: HostStatus{valuer.NewString("UNKNOWN")},
},
wantErr: true,
},
}
for _, tt := range tests {
t.Run(tt.name, func(t *testing.T) {
err := tt.req.Validate()
if tt.wantErr {
require.Error(t, err)
require.True(t, errors.Ast(err, errors.TypeInvalidInput), "expected error to be of type InvalidInput")
} else {
require.NoError(t, err)
}
})
}
}