Compare commits

...

1 Commits

Author SHA1 Message Date
Srikanth Chekuri
e1e9d516ac chore: add firing alert count and system/k8s metric existance status (#11730)
Some checks are pending
build-staging / prepare (push) Waiting to run
build-staging / js-build (push) Blocked by required conditions
build-staging / go-build (push) Blocked by required conditions
build-staging / staging (push) Blocked by required conditions
Release Drafter / update_release_draft (push) Waiting to run
2026-06-16 10:28:22 +00:00
5 changed files with 71 additions and 1 deletions

View File

@@ -0,0 +1,32 @@
package implinframonitoring
import (
"context"
"fmt"
"github.com/SigNoz/signoz/pkg/errors"
"github.com/SigNoz/signoz/pkg/telemetrymetrics"
"github.com/SigNoz/signoz/pkg/valuer"
)
func (m *module) Collect(ctx context.Context, _ valuer.UUID) (map[string]any, error) {
stats := make(map[string]any)
metadataTable := fmt.Sprintf("%s.%s", telemetrymetrics.DBName, telemetrymetrics.AttributesMetadataTableName)
var (
systemMetricCount uint64
k8sMetricCount uint64
)
query := fmt.Sprintf(
"SELECT (SELECT count() FROM (SELECT 1 FROM %s WHERE metric_name LIKE 'system.%%' LIMIT 1)), (SELECT count() FROM (SELECT 1 FROM %s WHERE metric_name LIKE 'k8s.%%' LIMIT 1))",
metadataTable, metadataTable,
)
if err := m.telemetryStore.ClickhouseDB().QueryRow(ctx, query).Scan(&systemMetricCount, &k8sMetricCount); err == nil {
stats["telemetry.metrics.system.exists"] = systemMetricCount > 0
stats["telemetry.metrics.k8s.exists"] = k8sMetricCount > 0
} else {
m.logger.DebugContext(ctx, "failed to collect metrics namespace existence stats", errors.Attr(err))
}
return stats, nil
}

View File

@@ -4,6 +4,7 @@ import (
"context"
"net/http"
"github.com/SigNoz/signoz/pkg/statsreporter"
"github.com/SigNoz/signoz/pkg/types/inframonitoringtypes"
"github.com/SigNoz/signoz/pkg/valuer"
)
@@ -22,6 +23,7 @@ type Handler interface {
}
type Module interface {
statsreporter.StatsCollector
ListHosts(ctx context.Context, orgID valuer.UUID, req *inframonitoringtypes.PostableHosts) (*inframonitoringtypes.Hosts, error)
ListPods(ctx context.Context, orgID valuer.UUID, req *inframonitoringtypes.PostablePods) (*inframonitoringtypes.Pods, error)
ListNodes(ctx context.Context, orgID valuer.UUID, req *inframonitoringtypes.PostableNodes) (*inframonitoringtypes.Nodes, error)

View File

@@ -744,6 +744,32 @@ func (m *Manager) TriggeredAlerts() []*ruletypes.NamedAlert {
return namedAlerts
}
type AlertStats struct {
FiringRules int64
LastFiredAt time.Time
}
func (m *Manager) AlertStats(ctx context.Context) AlertStats {
m.mtx.RLock()
defer m.mtx.RUnlock()
// TODO(therealpandey): scope these stats per org; rules for all orgs are aggregated here.
stats := AlertStats{}
for _, r := range m.rules {
if r.State() == ruletypes.StateFiring {
stats.FiringRules++
}
for _, alert := range r.ActiveAlerts() {
if alert.FiredAt.After(stats.LastFiredAt) {
stats.LastFiredAt = alert.FiredAt
}
}
}
return stats
}
// NotifyFunc sends notifications about a set of alerts generated by the given expression.
type NotifyFunc func(ctx context.Context, orgID string, alerts ...*ruletypes.Alert)

View File

@@ -100,7 +100,16 @@ func (provider *provider) Collect(ctx context.Context, orgID valuer.UUID) (map[s
return nil, err
}
return ruletypes.NewStatsFromRules(rules), nil
stats := ruletypes.NewStatsFromRules(rules)
alertStats := provider.manager.AlertStats(ctx)
stats["alert.firing.count"] = alertStats.FiringRules
if !alertStats.LastFiredAt.IsZero() {
stats["alert.last_fired.time"] = alertStats.LastFiredAt.UTC()
stats["alert.last_fired.time_unix"] = alertStats.LastFiredAt.Unix()
}
return stats, nil
}
func (provider *provider) ListRuleStates(ctx context.Context) (*ruletypes.GettableRules, error) {

View File

@@ -499,6 +499,7 @@ func New(
serviceAccount,
cloudIntegrationModule,
modules.LogsPipeline,
modules.InfraMonitoring,
querier,
}