Compare commits

..

3 Commits

Author SHA1 Message Date
grandwizard28
79fa501ac1 fix(middleware): replace fmt.Errorf with errors.New in recovery test 2026-03-22 15:18:49 +05:30
grandwizard28
c93d6c4b3b style(errors): keep WithStacktrace call on same line in test 2026-03-22 14:43:55 +05:30
grandwizard28
bab50321fd feat(middleware): add panic recovery middleware with TypeFatal error type
Add a global HTTP recovery middleware that catches panics, logs them
with OTel exception semantic conventions via errors.Attr, and returns
a safe user-facing error response. Introduce TypeFatal/CodeFatal for
unrecoverable failures and WithStacktrace to attach pre-formatted
stack traces to errors. Remove redundant per-handler panic recovery
blocks in querier APIs.
2026-03-22 14:37:39 +05:30
12 changed files with 246 additions and 68 deletions

View File

@@ -5,9 +5,7 @@ import (
"context"
"encoding/json"
"io"
"log/slog"
"net/http"
"runtime/debug"
anomalyV2 "github.com/SigNoz/signoz/ee/anomaly"
"github.com/SigNoz/signoz/pkg/errors"
@@ -55,26 +53,6 @@ func (h *handler) QueryRange(rw http.ResponseWriter, req *http.Request) {
return
}
defer func() {
if r := recover(); r != nil {
stackTrace := string(debug.Stack())
queryJSON, _ := json.Marshal(queryRangeRequest)
h.set.Logger.ErrorContext(ctx, "panic in QueryRange",
slog.Any("error", r),
slog.Any("user", claims.UserID),
slog.String("payload", string(queryJSON)),
slog.String("stacktrace", stackTrace),
)
render.Error(rw, errors.NewInternalf(
errors.CodeInternal,
"Something went wrong on our end. It's not you, it's us. Our team is notified about it. Reach out to support if issue persists.",
))
}
}()
if err := queryRangeRequest.Validate(); err != nil {
render.Error(rw, err)
return

View File

@@ -211,6 +211,7 @@ func (s *Server) createPublicServer(apiHandler *api.APIHandler, web web.Web) (*h
r := baseapp.NewRouter()
am := middleware.NewAuthZ(s.signoz.Instrumentation.Logger(), s.signoz.Modules.OrgGetter, s.signoz.Authz)
r.Use(middleware.NewRecovery(s.signoz.Instrumentation.Logger()).Wrap)
r.Use(otelmux.Middleware(
"apiserver",
otelmux.WithMeterProvider(s.signoz.Instrumentation.MeterProvider()),
@@ -219,7 +220,6 @@ func (s *Server) createPublicServer(apiHandler *api.APIHandler, web web.Web) (*h
otelmux.WithFilter(func(r *http.Request) bool {
return !slices.Contains([]string{"/api/v1/health"}, r.URL.Path)
}),
otelmux.WithPublicEndpoint(),
))
r.Use(middleware.NewIdentN(s.signoz.IdentNResolver, s.signoz.Sharder, s.signoz.Instrumentation.Logger()).Wrap)
r.Use(middleware.NewTimeout(s.signoz.Instrumentation.Logger(),

View File

@@ -16,6 +16,7 @@ var (
CodeCanceled = Code{"canceled"}
CodeTimeout = Code{"timeout"}
CodeUnknown = Code{"unknown"}
CodeFatal = Code{"fatal"}
CodeLicenseUnavailable = Code{"license_unavailable"}
)

View File

@@ -22,14 +22,31 @@ type base struct {
// a denotes any additional error messages (if present).
a []string
// s contains the stacktrace captured at error creation time.
s stacktrace
s fmt.Stringer
}
// Stacktrace returns the stacktrace captured at error creation time, formatted as a string.
func (b *base) Stacktrace() string {
if b.s == nil {
return ""
}
return b.s.String()
}
// WithStacktrace replaces the auto-captured stacktrace with a pre-formatted string
// and returns a new base error.
func (b *base) WithStacktrace(s string) *base {
return &base{
t: b.t,
c: b.c,
m: b.m,
e: b.e,
u: b.u,
a: b.a,
s: rawStacktrace(s),
}
}
// base implements Error interface.
func (b *base) Error() string {
if b.e != nil {
@@ -89,7 +106,7 @@ func Wrap(cause error, t typ, code Code, message string) *base {
// WithAdditionalf adds an additional error message to the existing error.
func WithAdditionalf(cause error, format string, args ...any) *base {
t, c, m, e, u, a := Unwrapb(cause)
var s stacktrace
var s fmt.Stringer
if original, ok := cause.(*base); ok {
s = original.s
}

View File

@@ -58,3 +58,15 @@ func TestAttr(t *testing.T) {
assert.Equal(t, "exception", attr.Key)
assert.Equal(t, err, attr.Value.Any())
}
func TestWithStacktrace(t *testing.T) {
err := New(TypeInternal, MustNewCode("test_code"), "panic").WithStacktrace("custom stack trace")
assert.Equal(t, "custom stack trace", err.Stacktrace())
assert.Equal(t, "panic", err.Error())
typ, code, message, _, _, _ := Unwrapb(err)
assert.Equal(t, TypeInternal, typ)
assert.Equal(t, "test_code", code.String())
assert.Equal(t, "panic", message)
}

View File

@@ -36,3 +36,10 @@ func (s stacktrace) String() string {
}
return buf.String()
}
// rawStacktrace holds a pre-formatted stacktrace string.
type rawStacktrace string
func (r rawStacktrace) String() string {
return string(r)
}

View File

@@ -12,6 +12,7 @@ var (
TypeCanceled = typ{"canceled"}
TypeTimeout = typ{"timeout"}
TypeUnexpected = typ{"unexpected"} // Generic mismatch of expectations
TypeFatal = typ{"fatal"} // Unrecoverable failure (e.g. panic)
TypeLicenseUnavailable = typ{"license-unavailable"}
)

View File

@@ -0,0 +1,38 @@
package middleware
import (
"fmt"
"log/slog"
"net/http"
"runtime"
"github.com/SigNoz/signoz/pkg/errors"
"github.com/SigNoz/signoz/pkg/http/render"
)
type Recovery struct {
logger *slog.Logger
}
func NewRecovery(logger *slog.Logger) *Recovery {
return &Recovery{
logger: logger.With(slog.String("pkg", pkgname)),
}
}
func (middleware *Recovery) Wrap(next http.Handler) http.Handler {
return http.HandlerFunc(func(rw http.ResponseWriter, req *http.Request) {
defer func() {
if r := recover(); r != nil {
buf := make([]byte, 4096)
n := runtime.Stack(buf, false)
err := errors.New(errors.TypeFatal, errors.CodeFatal, fmt.Sprint(r)).WithStacktrace(string(buf[:n]))
middleware.logger.ErrorContext(req.Context(), "panic recovered", errors.Attr(err))
render.Error(rw, errors.Wrap(err, errors.TypeFatal, errors.CodeFatal, "An unexpected error occurred on our end. Please try again."))
}
}()
next.ServeHTTP(rw, req)
})
}

View File

@@ -0,0 +1,164 @@
package middleware
import (
"context"
"encoding/json"
"log/slog"
"net/http"
"net/http/httptest"
"testing"
"github.com/SigNoz/signoz/pkg/errors"
"github.com/stretchr/testify/assert"
"github.com/stretchr/testify/require"
)
func TestRecovery(t *testing.T) {
t.Parallel()
testCases := []struct {
name string
handler http.HandlerFunc
wantStatus int
wantLog bool
wantMessage string
wantErrorStatus bool
}{
{
name: "PanicWithString",
handler: func(w http.ResponseWriter, r *http.Request) {
panic("something went wrong")
},
wantStatus: http.StatusInternalServerError,
wantLog: true,
wantMessage: "something went wrong",
wantErrorStatus: true,
},
{
name: "PanicWithError",
handler: func(w http.ResponseWriter, r *http.Request) {
panic(errors.New(errors.TypeInternal, errors.CodeInternal, "db connection failed"))
},
wantStatus: http.StatusInternalServerError,
wantLog: true,
wantMessage: "db connection failed",
wantErrorStatus: true,
},
{
name: "PanicWithInteger",
handler: func(w http.ResponseWriter, r *http.Request) {
panic(42)
},
wantStatus: http.StatusInternalServerError,
wantLog: true,
wantMessage: "42",
wantErrorStatus: true,
},
{
name: "PanicWithDivisionByZero",
handler: func(w http.ResponseWriter, r *http.Request) {
divisor := 0
_ = 1 / divisor
},
wantStatus: http.StatusInternalServerError,
wantLog: true,
wantMessage: "runtime error: integer divide by zero",
wantErrorStatus: true,
},
{
name: "NoPanic",
handler: func(w http.ResponseWriter, r *http.Request) {
w.WriteHeader(http.StatusOK)
},
wantStatus: http.StatusOK,
wantLog: false,
},
}
for _, tc := range testCases {
t.Run(tc.name, func(t *testing.T) {
t.Parallel()
var records []slog.Record
logger := slog.New(newRecordCollector(&records))
m := NewRecovery(logger)
handler := m.Wrap(http.HandlerFunc(tc.handler))
rr := httptest.NewRecorder()
req := httptest.NewRequest(http.MethodGet, "/", nil)
handler.ServeHTTP(rr, req)
assert.Equal(t, tc.wantStatus, rr.Code)
if !tc.wantLog {
assert.Empty(t, records)
return
}
require.Len(t, records, 1)
err := extractException(t, records[0])
require.NotNil(t, err)
typ, _, message, _, _, _ := errors.Unwrapb(err)
assert.Equal(t, errors.TypeFatal, typ)
assert.Equal(t, tc.wantMessage, message)
type stacktracer interface {
Stacktrace() string
}
st, ok := err.(stacktracer)
require.True(t, ok, "error should implement stacktracer")
assert.NotEmpty(t, st.Stacktrace())
if tc.wantErrorStatus {
var body map[string]any
require.NoError(t, json.Unmarshal(rr.Body.Bytes(), &body))
assert.Equal(t, "error", body["status"])
}
})
}
}
// extractException finds the "exception" attr in a log record and returns the error.
func extractException(t *testing.T, record slog.Record) error {
t.Helper()
var found error
record.Attrs(func(a slog.Attr) bool {
if a.Key == "exception" {
if err, ok := a.Value.Any().(error); ok {
found = err
return false
}
}
return true
})
return found
}
// recordCollector is an slog.Handler that collects records for assertion.
type recordCollector struct {
records *[]slog.Record
attrs []slog.Attr
}
func newRecordCollector(records *[]slog.Record) *recordCollector {
return &recordCollector{records: records}
}
func (h *recordCollector) Enabled(_ context.Context, _ slog.Level) bool { return true }
func (h *recordCollector) Handle(_ context.Context, record slog.Record) error {
for _, a := range h.attrs {
record.AddAttrs(a)
}
*h.records = append(*h.records, record)
return nil
}
func (h *recordCollector) WithAttrs(attrs []slog.Attr) slog.Handler {
return &recordCollector{records: h.records, attrs: append(h.attrs, attrs...)}
}
func (h *recordCollector) WithGroup(_ string) slog.Handler { return h }

View File

@@ -64,6 +64,8 @@ func Error(rw http.ResponseWriter, cause error) {
httpCode = statusClientClosedConnection
case errors.TypeTimeout:
httpCode = http.StatusGatewayTimeout
case errors.TypeFatal:
httpCode = http.StatusInternalServerError
case errors.TypeLicenseUnavailable:
httpCode = http.StatusUnavailableForLegalReasons
}

View File

@@ -5,9 +5,7 @@ import (
"context"
"encoding/json"
"fmt"
"log/slog"
"net/http"
"runtime/debug"
"strconv"
"github.com/SigNoz/signoz/pkg/analytics"
@@ -52,26 +50,6 @@ func (handler *handler) QueryRange(rw http.ResponseWriter, req *http.Request) {
return
}
defer func() {
if r := recover(); r != nil {
stackTrace := string(debug.Stack())
queryJSON, _ := json.Marshal(queryRangeRequest)
handler.set.Logger.ErrorContext(ctx, "panic in QueryRange",
slog.Any("error", r),
slog.Any("user", claims.UserID),
slog.String("payload", string(queryJSON)),
slog.String("stacktrace", stackTrace),
)
render.Error(rw, errors.NewInternalf(
errors.CodeInternal,
"Something went wrong on our end. It's not you, it's us. Our team is notified about it. Reach out to support if issue persists.",
))
}
}()
// Validate the query request
if err := queryRangeRequest.Validate(); err != nil {
render.Error(rw, err)
@@ -152,26 +130,6 @@ func (handler *handler) QueryRawStream(rw http.ResponseWriter, req *http.Request
return
}
defer func() {
if r := recover(); r != nil {
stackTrace := string(debug.Stack())
queryJSON, _ := json.Marshal(queryRangeRequest)
handler.set.Logger.ErrorContext(ctx, "panic in QueryRawStream",
slog.Any("error", r),
slog.Any("user", claims.UserID),
slog.String("payload", string(queryJSON)),
slog.String("stacktrace", stackTrace),
)
render.Error(rw, errors.NewInternalf(
errors.CodeInternal,
"Something went wrong on our end. It's not you, it's us. Our team is notified about it. Reach out to support if issue persists.",
))
}
}()
// Validate the query request
if err := queryRangeRequest.Validate(); err != nil {
render.Error(rw, err)

View File

@@ -190,6 +190,7 @@ func (s Server) HealthCheckStatus() chan healthcheck.Status {
func (s *Server) createPublicServer(api *APIHandler, web web.Web) (*http.Server, error) {
r := NewRouter()
r.Use(middleware.NewRecovery(s.signoz.Instrumentation.Logger()).Wrap)
r.Use(otelmux.Middleware(
"apiserver",
otelmux.WithMeterProvider(s.signoz.Instrumentation.MeterProvider()),
@@ -198,7 +199,6 @@ func (s *Server) createPublicServer(api *APIHandler, web web.Web) (*http.Server,
otelmux.WithFilter(func(r *http.Request) bool {
return !slices.Contains([]string{"/api/v1/health"}, r.URL.Path)
}),
otelmux.WithPublicEndpoint(),
))
r.Use(middleware.NewIdentN(s.signoz.IdentNResolver, s.signoz.Sharder, s.signoz.Instrumentation.Logger()).Wrap)
r.Use(middleware.NewTimeout(s.signoz.Instrumentation.Logger(),