Compare commits

..

1 Commits

Author SHA1 Message Date
vikrantgupta25
501ad64b9e test(wal): log the leaky queries 2026-04-10 00:00:20 +05:30
11 changed files with 83 additions and 85 deletions

View File

@@ -8,7 +8,6 @@ import (
"github.com/SigNoz/signoz/cmd"
"github.com/SigNoz/signoz/pkg/analytics"
"github.com/SigNoz/signoz/pkg/auditor"
"github.com/SigNoz/signoz/pkg/authn"
"github.com/SigNoz/signoz/pkg/authz"
"github.com/SigNoz/signoz/pkg/authz/openfgaauthz"
@@ -94,9 +93,6 @@ func runServer(ctx context.Context, config signoz.Config, logger *slog.Logger) e
func(_ licensing.Licensing) factory.ProviderFactory[gateway.Gateway, gateway.Config] {
return noopgateway.NewProviderFactory()
},
func(_ licensing.Licensing) factory.NamedMap[factory.ProviderFactory[auditor.Auditor, auditor.Config]] {
return signoz.NewAuditorProviderFactories()
},
func(ps factory.ProviderSettings, q querier.Querier, a analytics.Analytics) querier.Handler {
return querier.NewHandler(ps, q, a)
},

View File

@@ -8,7 +8,6 @@ import (
"github.com/spf13/cobra"
"github.com/SigNoz/signoz/cmd"
"github.com/SigNoz/signoz/ee/auditor/otlphttpauditor"
"github.com/SigNoz/signoz/ee/authn/callbackauthn/oidccallbackauthn"
"github.com/SigNoz/signoz/ee/authn/callbackauthn/samlcallbackauthn"
"github.com/SigNoz/signoz/ee/authz/openfgaauthz"
@@ -25,7 +24,6 @@ import (
enterprisezeus "github.com/SigNoz/signoz/ee/zeus"
"github.com/SigNoz/signoz/ee/zeus/httpzeus"
"github.com/SigNoz/signoz/pkg/analytics"
"github.com/SigNoz/signoz/pkg/auditor"
"github.com/SigNoz/signoz/pkg/authn"
"github.com/SigNoz/signoz/pkg/authz"
"github.com/SigNoz/signoz/pkg/errors"
@@ -135,13 +133,6 @@ func runServer(ctx context.Context, config signoz.Config, logger *slog.Logger) e
func(licensing licensing.Licensing) factory.ProviderFactory[gateway.Gateway, gateway.Config] {
return httpgateway.NewProviderFactory(licensing)
},
func(licensing licensing.Licensing) factory.NamedMap[factory.ProviderFactory[auditor.Auditor, auditor.Config]] {
factories := signoz.NewAuditorProviderFactories()
if err := factories.Add(otlphttpauditor.NewFactory(licensing, version.Info)); err != nil {
panic(err)
}
return factories
},
func(ps factory.ProviderSettings, q querier.Querier, a analytics.Analytics) querier.Handler {
communityHandler := querier.NewHandler(ps, q, a)
return eequerier.NewHandler(ps, q, communityHandler)

View File

@@ -364,34 +364,3 @@ serviceaccount:
analytics:
# toggle service account analytics
enabled: true
##################### Auditor #####################
auditor:
# Specifies the auditor provider to use.
# noop: discards all audit events (community default).
# otlphttp: exports audit events via OTLP HTTP (enterprise).
provider: noop
# The async channel capacity for audit events. Events are dropped when full (fail-open).
buffer_size: 1000
# The maximum number of events per export batch.
batch_size: 100
# The maximum time between export flushes.
flush_interval: 1s
otlphttp:
# The target scheme://host:port/path of the OTLP HTTP endpoint.
endpoint: http://localhost:4318/v1/logs
# Whether to use HTTP instead of HTTPS.
insecure: false
# The maximum duration for an export attempt.
timeout: 10s
# Additional HTTP headers sent with every export request.
headers: {}
retry:
# Whether to retry on transient failures.
enabled: true
# The initial wait time before the first retry.
initial_interval: 5s
# The upper bound on backoff interval.
max_interval: 30s
# The total maximum time spent retrying.
max_elapsed_time: 60s

View File

@@ -227,7 +227,7 @@ func (s *Server) createPublicServer(apiHandler *api.APIHandler, web web.Web) (*h
s.config.APIServer.Timeout.Default,
s.config.APIServer.Timeout.Max,
).Wrap)
r.Use(middleware.NewAudit(s.signoz.Instrumentation.Logger(), s.config.APIServer.Logging.ExcludedRoutes, s.signoz.Auditor).Wrap)
r.Use(middleware.NewAudit(s.signoz.Instrumentation.Logger(), s.config.APIServer.Logging.ExcludedRoutes, nil).Wrap)
r.Use(middleware.NewComment().Wrap)
apiHandler.RegisterRoutes(r, am)

View File

@@ -677,18 +677,6 @@ function NewWidget({
queryType: currentQuery.queryType,
isNewPanel,
dataSource: currentQuery?.builder?.queryData?.[0]?.dataSource,
...(currentQuery.queryType === EQueryType.CLICKHOUSE && {
clickhouseQueryCount: currentQuery.clickhouse_sql.length,
clickhouseQueries: currentQuery.clickhouse_sql.map((q) => ({
name: q.name,
query: (q.query ?? '')
.replace(/--[^\n]*/g, '') // strip line comments
.replace(/\/\*[\s\S]*?\*\//g, '') // strip block comments
.replace(/'(?:[^'\\]|\\.|'')*'/g, "'?'") // replace single-quoted strings (handles \' and '' escapes)
.replace(/\b\d+(?:\.\d+)?(?:[eE][+-]?\d+)?\b/g, '?'), // replace numeric literals (int, float, scientific)
disabled: q.disabled,
})),
}),
});
setSaveModal(true);
// eslint-disable-next-line react-hooks/exhaustive-deps

View File

@@ -63,7 +63,6 @@ type RetryConfig struct {
func newConfig() factory.Config {
return Config{
Provider: "noop",
BufferSize: 1000,
BatchSize: 100,
FlushInterval: time.Second,

View File

@@ -208,7 +208,7 @@ func (s *Server) createPublicServer(api *APIHandler, web web.Web) (*http.Server,
s.config.APIServer.Timeout.Default,
s.config.APIServer.Timeout.Max,
).Wrap)
r.Use(middleware.NewAudit(s.signoz.Instrumentation.Logger(), s.config.APIServer.Logging.ExcludedRoutes, s.signoz.Auditor).Wrap)
r.Use(middleware.NewAudit(s.signoz.Instrumentation.Logger(), s.config.APIServer.Logging.ExcludedRoutes, nil).Wrap)
r.Use(middleware.NewComment().Wrap)
am := middleware.NewAuthZ(s.signoz.Instrumentation.Logger(), s.signoz.Modules.OrgGetter, s.signoz.Authz)

View File

@@ -11,7 +11,6 @@ import (
"github.com/SigNoz/signoz/pkg/alertmanager"
"github.com/SigNoz/signoz/pkg/analytics"
"github.com/SigNoz/signoz/pkg/apiserver"
"github.com/SigNoz/signoz/pkg/auditor"
"github.com/SigNoz/signoz/pkg/cache"
"github.com/SigNoz/signoz/pkg/config"
"github.com/SigNoz/signoz/pkg/emailing"
@@ -124,9 +123,6 @@ type Config struct {
// ServiceAccount config
ServiceAccount serviceaccount.Config `mapstructure:"serviceaccount"`
// Auditor config
Auditor auditor.Config `mapstructure:"auditor"`
}
func NewConfig(ctx context.Context, logger *slog.Logger, resolverConfig config.ResolverConfig) (Config, error) {
@@ -157,7 +153,6 @@ func NewConfig(ctx context.Context, logger *slog.Logger, resolverConfig config.R
user.NewConfigFactory(),
identn.NewConfigFactory(),
serviceaccount.NewConfigFactory(),
auditor.NewConfigFactory(),
}
conf, err := config.New(ctx, resolverConfig, configFactories)

View File

@@ -3,8 +3,6 @@ package signoz
import (
"github.com/SigNoz/signoz/pkg/alertmanager"
"github.com/SigNoz/signoz/pkg/alertmanager/nfmanager"
"github.com/SigNoz/signoz/pkg/auditor"
"github.com/SigNoz/signoz/pkg/auditor/noopauditor"
"github.com/SigNoz/signoz/pkg/alertmanager/nfmanager/rulebasednotification"
"github.com/SigNoz/signoz/pkg/alertmanager/signozalertmanager"
"github.com/SigNoz/signoz/pkg/analytics"
@@ -314,12 +312,6 @@ func NewGlobalProviderFactories(identNConfig identn.Config) factory.NamedMap[fac
)
}
func NewAuditorProviderFactories() factory.NamedMap[factory.ProviderFactory[auditor.Auditor, auditor.Config]] {
return factory.MustNewNamedMap(
noopauditor.NewFactory(),
)
}
func NewFlaggerProviderFactories(registry featuretypes.Registry) factory.NamedMap[factory.ProviderFactory[flagger.FlaggerProvider, flagger.Config]] {
return factory.MustNewNamedMap(
configflagger.NewFactory(registry),

View File

@@ -6,7 +6,6 @@ import (
"github.com/SigNoz/signoz/pkg/alertmanager"
"github.com/SigNoz/signoz/pkg/alertmanager/nfmanager"
"github.com/SigNoz/signoz/pkg/auditor"
"github.com/SigNoz/signoz/pkg/alertmanager/nfmanager/nfroutingstore/sqlroutingstore"
"github.com/SigNoz/signoz/pkg/analytics"
"github.com/SigNoz/signoz/pkg/apiserver"
@@ -76,7 +75,6 @@ type SigNoz struct {
QueryParser queryparser.QueryParser
Flagger flagger.Flagger
Gateway gateway.Gateway
Auditor auditor.Auditor
}
func New(
@@ -96,7 +94,6 @@ func New(
authzCallback func(context.Context, sqlstore.SQLStore, licensing.Licensing, dashboard.Module) (factory.ProviderFactory[authz.AuthZ, authz.Config], error),
dashboardModuleCallback func(sqlstore.SQLStore, factory.ProviderSettings, analytics.Analytics, organization.Getter, queryparser.QueryParser, querier.Querier, licensing.Licensing) dashboard.Module,
gatewayProviderFactory func(licensing.Licensing) factory.ProviderFactory[gateway.Gateway, gateway.Config],
auditorProviderFactories func(licensing.Licensing) factory.NamedMap[factory.ProviderFactory[auditor.Auditor, auditor.Config]],
querierHandlerCallback func(factory.ProviderSettings, querier.Querier, analytics.Analytics) querier.Handler,
) (*SigNoz, error) {
// Initialize instrumentation
@@ -374,12 +371,6 @@ func New(
return nil, err
}
// Initialize auditor from the variant-specific provider factories
auditor, err := factory.NewProviderFromNamedMap(ctx, providerSettings, config.Auditor, auditorProviderFactories(licensing), config.Auditor.Provider)
if err != nil {
return nil, err
}
// Initialize authns
store := sqlauthnstore.NewStore(sqlstore)
authNs, err := authNsCallback(ctx, providerSettings, store, licensing)
@@ -479,7 +470,6 @@ func New(
factory.NewNamedService(factory.MustNewName("tokenizer"), tokenizer),
factory.NewNamedService(factory.MustNewName("authz"), authz),
factory.NewNamedService(factory.MustNewName("user"), userService, factory.MustNewName("authz")),
factory.NewNamedService(factory.MustNewName("auditor"), auditor),
)
if err != nil {
return nil, err
@@ -526,6 +516,5 @@ func New(
QueryParser: queryParser,
Flagger: flagger,
Gateway: gateway,
Auditor: auditor,
}, nil
}

View File

@@ -6,6 +6,8 @@ import (
"fmt"
"log/slog"
"net/url"
"os"
"time"
"github.com/SigNoz/signoz/pkg/errors"
"github.com/SigNoz/signoz/pkg/factory"
@@ -23,6 +25,7 @@ type provider struct {
bundb *sqlstore.BunDB
dialect *dialect
formatter sqlstore.SQLFormatter
done chan struct{}
}
func NewFactory(hookFactories ...factory.ProviderFactory[sqlstore.SQLStoreHook, sqlstore.Config]) factory.ProviderFactory[sqlstore.SQLStore, sqlstore.Config] {
@@ -59,13 +62,19 @@ func New(ctx context.Context, providerSettings factory.ProviderSettings, config
sqliteDialect := sqlitedialect.New()
bunDB := sqlstore.NewBunDB(settings, sqldb, sqliteDialect, hooks)
return &provider{
done := make(chan struct{})
p := &provider{
settings: settings,
sqldb: sqldb,
bundb: bunDB,
dialect: new(dialect),
formatter: newFormatter(bunDB.Dialect()),
}, nil
done: done,
}
go p.walDiagnosticLoop(config.Sqlite.Path)
return p, nil
}
func (provider *provider) BunDB() *bun.DB {
@@ -109,3 +118,73 @@ func (provider *provider) WrapAlreadyExistsErrf(err error, code errors.Code, for
return err
}
// walDiagnosticLoop periodically logs pool stats, WAL file size, and busy prepared statements
// to help diagnose WAL checkpoint failures caused by permanent read locks.
func (provider *provider) walDiagnosticLoop(dbPath string) {
ticker := time.NewTicker(60 * time.Second)
defer ticker.Stop()
logger := provider.settings.Logger()
walPath := dbPath + "-wal"
for {
select {
case <-provider.done:
return
case <-ticker.C:
// 1. Log pool stats (no SQL needed)
stats := provider.sqldb.Stats()
logger.Info("sqlite_pool_stats",
slog.Int("max_open", stats.MaxOpenConnections),
slog.Int("open", stats.OpenConnections),
slog.Int("in_use", stats.InUse),
slog.Int("idle", stats.Idle),
slog.Int64("wait_count", stats.WaitCount),
slog.String("wait_duration", stats.WaitDuration.String()),
slog.Int64("max_idle_closed", stats.MaxIdleClosed),
slog.Int64("max_idle_time_closed", stats.MaxIdleTimeClosed),
slog.Int64("max_lifetime_closed", stats.MaxLifetimeClosed),
)
// 2. Log WAL file size (no SQL needed)
if info, err := os.Stat(walPath); err == nil {
logger.Info("sqlite_wal_size",
slog.Int64("bytes", info.Size()),
slog.String("path", walPath),
)
}
// 3. Check for busy prepared statements on a single pool connection
provider.checkBusyStatements(logger)
}
}
}
func (provider *provider) checkBusyStatements(logger *slog.Logger) {
conn, err := provider.sqldb.Conn(context.Background())
if err != nil {
logger.Warn("sqlite_diag_conn_error", slog.String("error", err.Error()))
return
}
defer conn.Close()
rows, err := conn.QueryContext(context.Background(), "SELECT sql FROM sqlite_stmt WHERE busy")
if err != nil {
logger.Warn("sqlite_diag_query_error", slog.String("error", err.Error()))
return
}
defer rows.Close()
for rows.Next() {
var stmtSQL string
if err := rows.Scan(&stmtSQL); err != nil {
logger.Warn("sqlite_diag_scan_error", slog.String("error", err.Error()))
continue
}
logger.Warn("leaked_busy_statement", slog.String("sql", stmtSQL))
}
if err := rows.Err(); err != nil {
logger.Warn("sqlite_diag_rows_error", slog.String("error", err.Error()))
}
}