Compare commits

...

12 Commits

Author SHA1 Message Date
nikhilmantri0902
8ed17c2d36 chore: resolve conflicts 2026-02-27 13:11:40 +05:30
nikhilmantri0902
397b0afb7e chore: added process and nfs metrics also for determining active hosts 2026-02-27 13:06:33 +05:30
nikhilmantri0902
321253fcd7 chore: updated metrics list using scrapers of docs 2026-02-25 16:41:50 +05:30
nikhilmantri0902
462ebdf213 chore: remaining metrics 2026-02-23 16:42:44 +05:30
nikhilmantri0902
919f0fa2ec chore: added metric_names across entire otel system metrics rack 2026-02-23 15:17:49 +05:30
Nikhil Mantri
bd01c5cfe7 Merge branch 'main' into feat/infra_status_logic_change 2026-02-23 14:39:48 +05:30
Srikanth Chekuri
69b8f75326 Merge branch 'main' into feat/infra_status_logic_change 2026-02-23 11:13:31 +05:30
Nikhil Mantri
7c88940512 Merge branch 'main' into feat/infra_status_logic_change 2026-02-16 12:32:59 +05:30
nikhilmantri0902
b25dd9387f chore: also added system.filesystem.usage 2026-02-12 12:36:14 +05:30
Srikanth Chekuri
5f10844b6f Merge branch 'main' into feat/infra_status_logic_change 2026-02-12 12:21:06 +05:30
nikhilmantri0902
f186a237e2 chore: added rows err check 2026-02-08 12:02:22 +05:30
nikhilmantri0902
876e223413 chore: added frontend and backend change 2026-02-08 11:50:57 +05:30
6 changed files with 208 additions and 107 deletions

View File

@@ -190,6 +190,11 @@
.ant-table-cell:nth-child(n + 3) {
padding-right: 24px;
}
.status-header {
display: flex;
align-items: center;
gap: 4px;
}
.memory-usage-header {
display: flex;
align-items: center;

View File

@@ -146,7 +146,14 @@ export const getHostsListColumns = (): ColumnType<HostRowData>[] => [
),
},
{
title: 'Status',
title: (
<div className="status-header">
Status
<Tooltip title="Sent system metrics in last 10 mins">
<InfoCircleOutlined />
</Tooltip>
</div>
),
dataIndex: 'active',
key: 'active',
width: 100,

View File

@@ -3541,6 +3541,45 @@ func (r *ClickHouseReader) GetCountOfThings(ctx context.Context, query string) (
return count, nil
}
func (r *ClickHouseReader) GetActiveHostsFromMetricMetadata(ctx context.Context, metricNames []string, hostNameAttr string, sinceUnixMilli int64) (map[string]bool, error) {
activeHosts := map[string]bool{}
query := fmt.Sprintf(
`SELECT DISTINCT attr_string_value
FROM %s.%s
WHERE metric_name IN @metricNames
AND attr_name = @attrName
AND last_reported_unix_milli >= @sinceUnixMilli`,
signozMetricDBName,
constants.SIGNOZ_METADATA_TABLENAME,
)
rows, err := r.db.Query(ctx, query,
clickhouse.Named("metricNames", metricNames),
clickhouse.Named("attrName", hostNameAttr),
clickhouse.Named("sinceUnixMilli", sinceUnixMilli),
)
if err != nil {
return nil, errorsV2.WrapInternalf(err, errorsV2.CodeInternal, "error querying active hosts")
}
defer rows.Close()
for rows.Next() {
var hostName string
if err := rows.Scan(&hostName); err != nil {
return nil, errorsV2.WrapInternalf(err, errorsV2.CodeInternal, "error scanning active host row")
}
if hostName != "" {
activeHosts[hostName] = true
}
}
if err := rows.Err(); err != nil {
return nil, errorsV2.WrapInternalf(err, errorsV2.CodeInternal, "error iterating active host rows")
}
return activeHosts, nil
}
func (r *ClickHouseReader) GetLatestReceivedMetric(
ctx context.Context, metricNames []string, labelValues map[string]string,
) (*model.MetricStatus, *model.ApiError) {

View File

@@ -10,55 +10,110 @@ import (
)
var dotMetricMap = map[string]string{
"system_filesystem_usage": "system.filesystem.usage",
"system_cpu_time": "system.cpu.time",
"system_memory_usage": "system.memory.usage",
"system_cpu_load_average_15m": "system.cpu.load_average.15m",
"host_name": "host.name",
"k8s_cluster_name": "k8s.cluster.name",
"k8s_node_name": "k8s.node.name",
"k8s_pod_memory_usage": "k8s.pod.memory.usage",
"k8s_pod_cpu_request_utilization": "k8s.pod.cpu_request_utilization",
"k8s_pod_memory_request_utilization": "k8s.pod.memory_request_utilization",
"k8s_pod_cpu_limit_utilization": "k8s.pod.cpu_limit_utilization",
"k8s_pod_memory_limit_utilization": "k8s.pod.memory_limit_utilization",
"k8s_container_restarts": "k8s.container.restarts",
"k8s_pod_phase": "k8s.pod.phase",
"k8s_node_allocatable_cpu": "k8s.node.allocatable_cpu",
"k8s_node_allocatable_memory": "k8s.node.allocatable_memory",
"k8s_node_memory_usage": "k8s.node.memory.usage",
"k8s_node_condition_ready": "k8s.node.condition_ready",
"k8s_daemonset_desired_scheduled_nodes": "k8s.daemonset.desired_scheduled_nodes",
"k8s_daemonset_current_scheduled_nodes": "k8s.daemonset.current_scheduled_nodes",
"k8s_deployment_desired": "k8s.deployment.desired",
"k8s_deployment_available": "k8s.deployment.available",
"k8s_job_desired_successful_pods": "k8s.job.desired_successful_pods",
"k8s_job_active_pods": "k8s.job.active_pods",
"k8s_job_failed_pods": "k8s.job.failed_pods",
"k8s_job_successful_pods": "k8s.job.successful_pods",
"k8s_statefulset_desired_pods": "k8s.statefulset.desired_pods",
"k8s_statefulset_current_pods": "k8s.statefulset.current_pods",
"k8s_namespace_name": "k8s.namespace.name",
"k8s_deployment_name": "k8s.deployment.name",
"k8s_cronjob_name": "k8s.cronjob.name",
"k8s_job_name": "k8s.job.name",
"k8s_daemonset_name": "k8s.daemonset.name",
"os_type": "os.type",
"process_cgroup": "process.cgroup",
"process_pid": "process.pid",
"process_parent_pid": "process.parent_pid",
"process_owner": "process.owner",
"process_executable_path": "process.executable.path",
"process_executable_name": "process.executable.name",
"process_command_line": "process.command_line",
"process_command": "process.command",
"process_memory_usage": "process.memory.usage",
"k8s_persistentvolumeclaim_name": "k8s.persistentvolumeclaim.name",
"k8s_volume_available": "k8s.volume.available",
"k8s_volume_capacity": "k8s.volume.capacity",
"k8s_volume_inodes": "k8s.volume.inodes",
"k8s_volume_inodes_free": "k8s.volume.inodes.free",
// add additional mappings as needed
"system_uptime": "system.uptime",
"system_cpu_physical_count": "system.cpu.physical.count",
"system_cpu_logical_count": "system.cpu.logical.count",
"system_cpu_time": "system.cpu.time",
"system_cpu_frequency": "system.cpu.frequency",
"system_cpu_utilization": "system.cpu.utilization",
"system_cpu_load_average_15m": "system.cpu.load_average.15m",
"system_memory_usage": "system.memory.usage",
"system_memory_limit": "system.memory.limit",
"system_memory_utilization": "system.memory.utilization",
"system_memory_linux_available": "system.memory.linux.available",
"system_memory_linux_shared": "system.memory.linux.shared",
"system_memory_linux_slab_usage": "system.memory.linux.slab.usage",
"system_paging_usage": "system.paging.usage",
"system_paging_utilization": "system.paging.utilization",
"system_paging_faults": "system.paging.faults",
"system_paging_operations": "system.paging.operations",
"system_disk_io": "system.disk.io",
"system_disk_operations": "system.disk.operations",
"system_disk_io_time": "system.disk.io_time",
"system_disk_operation_time": "system.disk.operation_time",
"system_disk_merged": "system.disk.merged",
"system_disk_limit": "system.disk.limit",
"system_filesystem_usage": "system.filesystem.usage",
"system_filesystem_utilization": "system.filesystem.utilization",
"system_filesystem_limit": "system.filesystem.limit",
"system_network_errors": "system.network.errors",
"system_network_io": "system.network.io",
"system_network_connections": "system.network.connections",
"system_network_dropped": "system.network.dropped",
"system_network_packets": "system.network.packets",
"system_processes_count": "system.processes.count",
"system_processes_created": "system.processes.created",
"system_disk_pending_operations": "system.disk.pending_operations",
"system_disk_weighted_io_time": "system.disk.weighted_io_time",
"system_filesystem_inodes_usage": "system.filesystem.inodes.usage",
"system_network_conntrack_count": "system.network.conntrack.count",
"system_network_conntrack_max": "system.network.conntrack.max",
"system_cpu_load_average_1m": "system.cpu.load_average.1m",
"system_cpu_load_average_5m": "system.cpu.load_average.5m",
"host_name": "host.name",
"k8s_cluster_name": "k8s.cluster.name",
"k8s_node_name": "k8s.node.name",
"k8s_pod_memory_usage": "k8s.pod.memory.usage",
"k8s_pod_cpu_request_utilization": "k8s.pod.cpu_request_utilization",
"k8s_pod_memory_request_utilization": "k8s.pod.memory_request_utilization",
"k8s_pod_cpu_limit_utilization": "k8s.pod.cpu_limit_utilization",
"k8s_pod_memory_limit_utilization": "k8s.pod.memory_limit_utilization",
"k8s_container_restarts": "k8s.container.restarts",
"k8s_pod_phase": "k8s.pod.phase",
"k8s_node_allocatable_cpu": "k8s.node.allocatable_cpu",
"k8s_node_allocatable_memory": "k8s.node.allocatable_memory",
"k8s_node_memory_usage": "k8s.node.memory.usage",
"k8s_node_condition_ready": "k8s.node.condition_ready",
"k8s_daemonset_desired_scheduled_nodes": "k8s.daemonset.desired_scheduled_nodes",
"k8s_daemonset_current_scheduled_nodes": "k8s.daemonset.current_scheduled_nodes",
"k8s_deployment_desired": "k8s.deployment.desired",
"k8s_deployment_available": "k8s.deployment.available",
"k8s_job_desired_successful_pods": "k8s.job.desired_successful_pods",
"k8s_job_active_pods": "k8s.job.active_pods",
"k8s_job_failed_pods": "k8s.job.failed_pods",
"k8s_job_successful_pods": "k8s.job.successful_pods",
"k8s_statefulset_desired_pods": "k8s.statefulset.desired_pods",
"k8s_statefulset_current_pods": "k8s.statefulset.current_pods",
"k8s_namespace_name": "k8s.namespace.name",
"k8s_deployment_name": "k8s.deployment.name",
"k8s_cronjob_name": "k8s.cronjob.name",
"k8s_job_name": "k8s.job.name",
"k8s_daemonset_name": "k8s.daemonset.name",
"os_type": "os.type",
"process_cgroup": "process.cgroup",
"process_pid": "process.pid",
"process_parent_pid": "process.parent_pid",
"process_owner": "process.owner",
"process_executable_path": "process.executable.path",
"process_executable_name": "process.executable.name",
"process_command_line": "process.command_line",
"process_command": "process.command",
"process_memory_usage": "process.memory.usage",
"process_memory_virtual": "process.memory.virtual",
"process_cpu_time": "process.cpu.time",
"process_disk_io": "process.disk.io",
"nfs_client_net_count": "nfs.client.net.count",
"nfs_client_net_tcp_connection_accepted": "nfs.client.net.tcp.connection.accepted",
"nfs_client_operation_count": "nfs.client.operation.count",
"nfs_client_procedure_count": "nfs.client.procedure.count",
"nfs_client_rpc_authrefresh_count": "nfs.client.rpc.authrefresh.count",
"nfs_client_rpc_count": "nfs.client.rpc.count",
"nfs_client_rpc_retransmit_count": "nfs.client.rpc.retransmit.count",
"nfs_server_fh_stale_count": "nfs.server.fh.stale.count",
"nfs_server_io": "nfs.server.io",
"nfs_server_net_count": "nfs.server.net.count",
"nfs_server_net_tcp_connection_accepted": "nfs.server.net.tcp.connection.accepted",
"nfs_server_operation_count": "nfs.server.operation.count",
"nfs_server_procedure_count": "nfs.server.procedure.count",
"nfs_server_repcache_requests": "nfs.server.repcache.requests",
"nfs_server_rpc_count": "nfs.server.rpc.count",
"nfs_server_thread_count": "nfs.server.thread.count",
"k8s_persistentvolumeclaim_name": "k8s.persistentvolumeclaim.name",
"k8s_volume_available": "k8s.volume.available",
"k8s_volume_capacity": "k8s.volume.capacity",
"k8s_volume_inodes": "k8s.volume.inodes",
"k8s_volume_inodes_free": "k8s.volume.inodes.free",
"k8s_pod_uid": "k8s.pod.uid",
"k8s_pod_name": "k8s.pod.name",

View File

@@ -73,6 +73,53 @@ var (
"load15": GetDotMetrics("system_cpu_load_average_15m"),
"wait": GetDotMetrics("system_cpu_time"),
}
uniqueMetricNamesForHosts = []string{
GetDotMetrics("system_uptime"),
GetDotMetrics("system_cpu_time"),
GetDotMetrics("system_cpu_load_average_1m"),
GetDotMetrics("system_cpu_load_average_5m"),
GetDotMetrics("system_cpu_load_average_15m"),
GetDotMetrics("system_memory_usage"),
GetDotMetrics("system_paging_usage"),
GetDotMetrics("system_paging_faults"),
GetDotMetrics("system_paging_operations"),
GetDotMetrics("system_disk_io"),
GetDotMetrics("system_disk_operations"),
GetDotMetrics("system_disk_io_time"),
GetDotMetrics("system_disk_operation_time"),
GetDotMetrics("system_disk_merged"),
GetDotMetrics("system_disk_pending_operations"),
GetDotMetrics("system_disk_weighted_io_time"),
GetDotMetrics("system_filesystem_usage"),
GetDotMetrics("system_filesystem_inodes_usage"),
GetDotMetrics("system_network_io"),
GetDotMetrics("system_network_errors"),
GetDotMetrics("system_network_connections"),
GetDotMetrics("system_network_dropped"),
GetDotMetrics("system_network_packets"),
GetDotMetrics("system_processes_count"),
GetDotMetrics("system_processes_created"),
GetDotMetrics("process_cpu_time"),
GetDotMetrics("process_disk_io"),
GetDotMetrics("process_memory_usage"),
GetDotMetrics("process_memory_virtual"),
GetDotMetrics("nfs_client_net_count"),
GetDotMetrics("nfs_client_net_tcp_connection_accepted"),
GetDotMetrics("nfs_client_operation_count"),
GetDotMetrics("nfs_client_procedure_count"),
GetDotMetrics("nfs_client_rpc_authrefresh_count"),
GetDotMetrics("nfs_client_rpc_count"),
GetDotMetrics("nfs_client_rpc_retransmit_count"),
GetDotMetrics("nfs_server_fh_stale_count"),
GetDotMetrics("nfs_server_io"),
GetDotMetrics("nfs_server_net_count"),
GetDotMetrics("nfs_server_net_tcp_connection_accepted"),
GetDotMetrics("nfs_server_operation_count"),
GetDotMetrics("nfs_server_procedure_count"),
GetDotMetrics("nfs_server_repcache_requests"),
GetDotMetrics("nfs_server_rpc_count"),
GetDotMetrics("nfs_server_thread_count"),
}
)
func NewHostsRepo(reader interfaces.Reader, querierV2 interfaces.Querier) *HostsRepo {
@@ -131,62 +178,9 @@ func (h *HostsRepo) GetHostAttributeValues(ctx context.Context, req v3.FilterAtt
return &v3.FilterAttributeValueResponse{StringAttributeValues: hostNames}, nil
}
func (h *HostsRepo) getActiveHosts(ctx context.Context, orgID valuer.UUID, req model.HostListRequest) (map[string]bool, error) {
activeStatus := map[string]bool{}
step := common.MinAllowedStepInterval(req.Start, req.End)
hasHostName := false
for _, key := range req.GroupBy {
if key.Key == hostNameAttrKey {
hasHostName = true
}
}
if !hasHostName {
req.GroupBy = append(req.GroupBy, v3.AttributeKey{Key: hostNameAttrKey})
}
params := v3.QueryRangeParamsV3{
Start: time.Now().Add(-time.Minute * 10).UTC().UnixMilli(),
End: time.Now().UTC().UnixMilli(),
Step: step,
CompositeQuery: &v3.CompositeQuery{
BuilderQueries: map[string]*v3.BuilderQuery{
"A": {
QueryName: "A",
StepInterval: step,
DataSource: v3.DataSourceMetrics,
AggregateAttribute: v3.AttributeKey{
Key: metricToUseForHostAttributes,
DataType: v3.AttributeKeyDataTypeFloat64,
},
Temporality: v3.Unspecified,
Filters: req.Filters,
GroupBy: req.GroupBy,
Expression: "A",
TimeAggregation: v3.TimeAggregationAvg,
SpaceAggregation: v3.SpaceAggregationAvg,
Disabled: false,
},
},
QueryType: v3.QueryTypeBuilder,
PanelType: v3.PanelTypeGraph,
},
}
queryResponse, _, err := h.querierV2.QueryRange(ctx, orgID, &params)
if err != nil {
return nil, err
}
for _, result := range queryResponse {
for _, series := range result.Series {
name := series.Labels[hostNameAttrKey]
activeStatus[name] = true
}
}
return activeStatus, nil
func (h *HostsRepo) getActiveHosts(ctx context.Context) (map[string]bool, error) {
tenMinAgo := time.Now().Add(-10 * time.Minute).UTC().UnixMilli()
return h.reader.GetActiveHostsFromMetricMetadata(ctx, uniqueMetricNamesForHosts, hostNameAttrKey, tenMinAgo)
}
func (h *HostsRepo) getMetadataAttributes(ctx context.Context, req model.HostListRequest) (map[string]map[string]string, error) {
@@ -450,7 +444,7 @@ func (h *HostsRepo) GetHostList(ctx context.Context, orgID valuer.UUID, req mode
return resp, err
}
activeHosts, err := h.getActiveHosts(ctx, orgID, req)
activeHosts, err := h.getActiveHosts(ctx)
if err != nil {
return resp, err
}

View File

@@ -99,6 +99,7 @@ type Reader interface {
SubscribeToQueryProgress(queryId string) (<-chan model.QueryProgress, func(), *model.ApiError)
GetCountOfThings(ctx context.Context, query string) (uint64, error)
GetActiveHostsFromMetricMetadata(ctx context.Context, metricNames []string, hostNameAttr string, sinceUnixMilli int64) (map[string]bool, error)
GetMetricsExistenceAndEarliestTime(ctx context.Context, metricNames []string) (uint64, uint64, error)