mirror of
https://github.com/SigNoz/signoz.git
synced 2026-06-02 15:10:34 +01:00
Compare commits
1 Commits
ns/flamegr
...
inline-log
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
6ffbb1ddb8 |
@@ -7,6 +7,7 @@ import (
|
||||
"log/slog"
|
||||
"net/url"
|
||||
"reflect"
|
||||
"strings"
|
||||
"time"
|
||||
|
||||
"github.com/SigNoz/signoz/pkg/contextlinks"
|
||||
@@ -85,19 +86,35 @@ func (r *ThresholdRule) prepareQueryRange(ctx context.Context, ts time.Time) (*q
|
||||
return req, nil
|
||||
}
|
||||
|
||||
func (r *ThresholdRule) prepareParamsForLogs(ctx context.Context, ts time.Time, lbls ruletypes.Labels) url.Values {
|
||||
// logSamples* bound the recent log lines we attach to a firing log-based alert.
|
||||
// They are kept small to bound the notification payload size.
|
||||
const (
|
||||
// logSamplesMaxCount is the number of most-recent matching log records sampled.
|
||||
logSamplesMaxCount = 5
|
||||
// logSampleBodyMaxLen truncates each sampled body (in runes) so a single large
|
||||
// record cannot blow up the annotation/notification.
|
||||
logSampleBodyMaxLen = 512
|
||||
)
|
||||
|
||||
// logsQueryParams extracts, for a log-based alert, the evaluation window and the
|
||||
// per-group where clause: the rule's filter combined with the breaching group's
|
||||
// label values (lbls). The same where clause backs both the related-logs link and
|
||||
// the sample-logs query, so they always refer to the same set of logs. ok is false
|
||||
// when the rule is not a single log builder query (e.g. a formula or non-logs
|
||||
// signal), in which case there is nothing to link to or sample.
|
||||
func (r *ThresholdRule) logsQueryParams(ctx context.Context, ts time.Time, lbls ruletypes.Labels) (start, end time.Time, whereClause string, ok bool) {
|
||||
selectedQuery := r.SelectedQuery(ctx)
|
||||
|
||||
qr, err := r.prepareQueryRange(ctx, ts)
|
||||
if err != nil {
|
||||
return nil
|
||||
return time.Time{}, time.Time{}, "", false
|
||||
}
|
||||
start := time.UnixMilli(int64(qr.Start))
|
||||
end := time.UnixMilli(int64(qr.End))
|
||||
start = time.UnixMilli(int64(qr.Start))
|
||||
end = time.UnixMilli(int64(qr.End))
|
||||
|
||||
// TODO(srikanthccv): handle formula queries
|
||||
if selectedQuery < "A" || selectedQuery > "Z" {
|
||||
return nil
|
||||
return time.Time{}, time.Time{}, "", false
|
||||
}
|
||||
|
||||
var q qbtypes.QueryBuilderQuery[qbtypes.LogAggregation]
|
||||
@@ -112,7 +129,7 @@ func (r *ThresholdRule) prepareParamsForLogs(ctx context.Context, ts time.Time,
|
||||
}
|
||||
|
||||
if q.Signal != telemetrytypes.SignalLogs {
|
||||
return nil
|
||||
return time.Time{}, time.Time{}, "", false
|
||||
}
|
||||
|
||||
filterExpr := ""
|
||||
@@ -120,11 +137,153 @@ func (r *ThresholdRule) prepareParamsForLogs(ctx context.Context, ts time.Time,
|
||||
filterExpr = q.Filter.Expression
|
||||
}
|
||||
|
||||
whereClause := contextlinks.PrepareFilterExpression(lbls.Map(), filterExpr, q.GroupBy)
|
||||
whereClause = contextlinks.PrepareFilterExpression(lbls.Map(), filterExpr, q.GroupBy)
|
||||
return start, end, whereClause, true
|
||||
}
|
||||
|
||||
func (r *ThresholdRule) prepareParamsForLogs(ctx context.Context, ts time.Time, lbls ruletypes.Labels) url.Values {
|
||||
start, end, whereClause, ok := r.logsQueryParams(ctx, ts, lbls)
|
||||
if !ok {
|
||||
return nil
|
||||
}
|
||||
return contextlinks.PrepareParamsForLogsV5(start, end, whereClause)
|
||||
}
|
||||
|
||||
// fetchLogSamples returns up to logSamplesMaxCount of the most-recent log records
|
||||
// matching the alert's filter for the breaching group (lbls), newest first. It
|
||||
// reuses the same where clause as the related-logs link, so the samples are exactly
|
||||
// the logs that link points to.
|
||||
//
|
||||
// Sampling is best-effort enrichment: any failure is logged and yields no samples
|
||||
// rather than failing the rule evaluation.
|
||||
func (r *ThresholdRule) fetchLogSamples(ctx context.Context, ts time.Time, lbls ruletypes.Labels) []*qbtypes.RawRow {
|
||||
start, end, whereClause, ok := r.logsQueryParams(ctx, ts, lbls)
|
||||
if !ok {
|
||||
return nil
|
||||
}
|
||||
|
||||
req := &qbtypes.QueryRangeRequest{
|
||||
Start: uint64(start.UnixMilli()),
|
||||
End: uint64(end.UnixMilli()),
|
||||
RequestType: qbtypes.RequestTypeRaw,
|
||||
CompositeQuery: qbtypes.CompositeQuery{
|
||||
Queries: []qbtypes.QueryEnvelope{
|
||||
{
|
||||
Type: qbtypes.QueryTypeBuilder,
|
||||
Spec: qbtypes.QueryBuilderQuery[qbtypes.LogAggregation]{
|
||||
Signal: telemetrytypes.SignalLogs,
|
||||
Name: "log_samples",
|
||||
Filter: &qbtypes.Filter{Expression: whereClause},
|
||||
Limit: logSamplesMaxCount,
|
||||
// timestamp,id DESC => most recent first. Both keys with an
|
||||
// identical direction are also what enables the window-list
|
||||
// fast path for raw log queries.
|
||||
Order: []qbtypes.OrderBy{
|
||||
{
|
||||
Direction: qbtypes.OrderDirectionDesc,
|
||||
Key: qbtypes.OrderByKey{TelemetryFieldKey: telemetrytypes.TelemetryFieldKey{Name: "timestamp", Materialized: true}},
|
||||
},
|
||||
{
|
||||
Direction: qbtypes.OrderDirectionDesc,
|
||||
Key: qbtypes.OrderByKey{TelemetryFieldKey: telemetrytypes.TelemetryFieldKey{Name: "id", Materialized: true}},
|
||||
},
|
||||
},
|
||||
},
|
||||
},
|
||||
},
|
||||
},
|
||||
NoCache: true,
|
||||
}
|
||||
|
||||
ctx = ctxtypes.NewContextWithCommentVals(ctx, map[string]string{
|
||||
instrumentationtypes.CodeNamespace: "rules",
|
||||
instrumentationtypes.CodeFunctionName: "fetchLogSamples",
|
||||
})
|
||||
|
||||
resp, err := r.querier.QueryRange(ctx, r.orgID, req)
|
||||
if err != nil {
|
||||
r.logger.WarnContext(ctx, "failed to fetch log samples for alert annotation", errors.Attr(err))
|
||||
return nil
|
||||
}
|
||||
|
||||
for _, item := range resp.Data.Results {
|
||||
if raw, ok := item.(*qbtypes.RawData); ok {
|
||||
return raw.Rows
|
||||
}
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
// formatLogSamples renders sampled log records as a compact, monospaced markdown
|
||||
// block: one line per record as "[RFC3339] SEVERITY body". Records without a body
|
||||
// are skipped (mirroring Datadog), each body is collapsed to a single line and
|
||||
// truncated to logSampleBodyMaxLen. Returns "" when there is nothing to show.
|
||||
func formatLogSamples(rows []*qbtypes.RawRow) string {
|
||||
lines := make([]string, 0, len(rows))
|
||||
for _, row := range rows {
|
||||
if row == nil {
|
||||
continue
|
||||
}
|
||||
|
||||
body := strings.TrimSpace(rawRowStringField(row, "body"))
|
||||
if body == "" {
|
||||
continue
|
||||
}
|
||||
body = truncateRunes(logSampleSingleLine(body), logSampleBodyMaxLen)
|
||||
|
||||
var sb strings.Builder
|
||||
if !row.Timestamp.IsZero() {
|
||||
sb.WriteString("[")
|
||||
sb.WriteString(row.Timestamp.UTC().Format(time.RFC3339))
|
||||
sb.WriteString("] ")
|
||||
}
|
||||
if sev := strings.TrimSpace(rawRowStringField(row, "severity_text")); sev != "" {
|
||||
sb.WriteString(sev)
|
||||
sb.WriteString(" ")
|
||||
}
|
||||
sb.WriteString(body)
|
||||
lines = append(lines, sb.String())
|
||||
}
|
||||
|
||||
if len(lines) == 0 {
|
||||
return ""
|
||||
}
|
||||
return "```\n" + strings.Join(lines, "\n") + "\n```"
|
||||
}
|
||||
|
||||
// rawRowStringField returns the named field from a raw row as a string, or "" if it
|
||||
// is absent or not a string.
|
||||
func rawRowStringField(row *qbtypes.RawRow, key string) string {
|
||||
if row == nil || row.Data == nil {
|
||||
return ""
|
||||
}
|
||||
if v, ok := row.Data[key]; ok {
|
||||
if s, ok := v.(string); ok {
|
||||
return s
|
||||
}
|
||||
}
|
||||
return ""
|
||||
}
|
||||
|
||||
// logSampleSingleLine collapses newlines so a multi-line log body renders as one
|
||||
// line within the samples block.
|
||||
func logSampleSingleLine(s string) string {
|
||||
replacer := strings.NewReplacer("\r\n", " ", "\n", " ", "\r", " ")
|
||||
return replacer.Replace(s)
|
||||
}
|
||||
|
||||
// truncateRunes shortens s to at most max runes, appending an ellipsis when trimmed.
|
||||
func truncateRunes(s string, max int) string {
|
||||
if max <= 0 {
|
||||
return s
|
||||
}
|
||||
runes := []rune(s)
|
||||
if len(runes) <= max {
|
||||
return s
|
||||
}
|
||||
return string(runes[:max]) + "…"
|
||||
}
|
||||
|
||||
func (r *ThresholdRule) prepareParamsForTraces(ctx context.Context, ts time.Time, lbls ruletypes.Labels) url.Values {
|
||||
selectedQuery := r.SelectedQuery(ctx)
|
||||
|
||||
@@ -352,6 +511,14 @@ func (r *ThresholdRule) Eval(ctx context.Context, ts time.Time) (int, error) {
|
||||
r.logger.InfoContext(ctx, "adding logs link to annotations", slog.String("annotation.link", link))
|
||||
annotations = append(annotations, ruletypes.Label{Name: ruletypes.AnnotationRelatedLogs, Value: link})
|
||||
}
|
||||
// Attach a few recent matching log lines so responders see what fired
|
||||
// the alert without leaving the notification. Skipped for no-data
|
||||
// alerts, which by definition have no matching logs.
|
||||
if !smpl.IsMissing {
|
||||
if samples := formatLogSamples(r.fetchLogSamples(ctx, ts, smpl.Metric)); samples != "" {
|
||||
annotations = append(annotations, ruletypes.Label{Name: ruletypes.AnnotationRelatedLogsSamples, Value: samples})
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
lbs := lb.Labels()
|
||||
|
||||
72
pkg/query-service/rules/threshold_rule_log_samples_test.go
Normal file
72
pkg/query-service/rules/threshold_rule_log_samples_test.go
Normal file
@@ -0,0 +1,72 @@
|
||||
package rules
|
||||
|
||||
import (
|
||||
"strings"
|
||||
"testing"
|
||||
"time"
|
||||
|
||||
qbtypes "github.com/SigNoz/signoz/pkg/types/querybuildertypes/querybuildertypesv5"
|
||||
"github.com/stretchr/testify/assert"
|
||||
)
|
||||
|
||||
func TestFormatLogSamples(t *testing.T) {
|
||||
ts := time.Date(2026, time.June, 1, 12, 0, 3, 0, time.UTC)
|
||||
|
||||
t.Run("nil and empty yield empty string", func(t *testing.T) {
|
||||
assert.Equal(t, "", formatLogSamples(nil))
|
||||
assert.Equal(t, "", formatLogSamples([]*qbtypes.RawRow{}))
|
||||
})
|
||||
|
||||
t.Run("skips nil rows and rows without a usable body", func(t *testing.T) {
|
||||
rows := []*qbtypes.RawRow{
|
||||
nil,
|
||||
{Timestamp: ts, Data: map[string]any{"body": ""}},
|
||||
{Timestamp: ts, Data: map[string]any{"body": " "}},
|
||||
{Timestamp: ts, Data: map[string]any{"severity_text": "ERROR"}}, // no body key
|
||||
{Timestamp: ts, Data: map[string]any{"body": 42}}, // body not a string
|
||||
}
|
||||
assert.Equal(t, "", formatLogSamples(rows))
|
||||
})
|
||||
|
||||
t.Run("renders timestamp, severity and body inside a code block", func(t *testing.T) {
|
||||
rows := []*qbtypes.RawRow{
|
||||
{Timestamp: ts, Data: map[string]any{"severity_text": "ERROR", "body": "payment failed"}},
|
||||
}
|
||||
want := "```\n[2026-06-01T12:00:03Z] ERROR payment failed\n```"
|
||||
assert.Equal(t, want, formatLogSamples(rows))
|
||||
})
|
||||
|
||||
t.Run("omits severity when absent and collapses a multi-line body", func(t *testing.T) {
|
||||
rows := []*qbtypes.RawRow{
|
||||
{Timestamp: ts, Data: map[string]any{"body": "line1\nline2\r\nline3"}},
|
||||
}
|
||||
want := "```\n[2026-06-01T12:00:03Z] line1 line2 line3\n```"
|
||||
assert.Equal(t, want, formatLogSamples(rows))
|
||||
})
|
||||
|
||||
t.Run("omits the timestamp prefix when zero", func(t *testing.T) {
|
||||
rows := []*qbtypes.RawRow{
|
||||
{Data: map[string]any{"body": "no ts"}},
|
||||
}
|
||||
assert.Equal(t, "```\nno ts\n```", formatLogSamples(rows))
|
||||
})
|
||||
|
||||
t.Run("renders one line per record and preserves input order", func(t *testing.T) {
|
||||
rows := []*qbtypes.RawRow{
|
||||
{Timestamp: ts, Data: map[string]any{"body": "first"}},
|
||||
{Timestamp: ts.Add(-time.Second), Data: map[string]any{"body": "second"}},
|
||||
}
|
||||
want := "```\n[2026-06-01T12:00:03Z] first\n[2026-06-01T12:00:02Z] second\n```"
|
||||
assert.Equal(t, want, formatLogSamples(rows))
|
||||
})
|
||||
|
||||
t.Run("truncates a long body to logSampleBodyMaxLen runes plus ellipsis", func(t *testing.T) {
|
||||
long := strings.Repeat("a", logSampleBodyMaxLen+50)
|
||||
rows := []*qbtypes.RawRow{
|
||||
{Timestamp: ts, Data: map[string]any{"body": long}},
|
||||
}
|
||||
out := formatLogSamples(rows)
|
||||
assert.Contains(t, out, strings.Repeat("a", logSampleBodyMaxLen)+"…")
|
||||
assert.NotContains(t, out, strings.Repeat("a", logSampleBodyMaxLen+1))
|
||||
})
|
||||
}
|
||||
@@ -30,12 +30,13 @@ const (
|
||||
// {{ .Annotations.value }}, {{ .Annotations.threshold.value }}, etc. in
|
||||
// their channel templates.
|
||||
const (
|
||||
AnnotationTitleTemplate = "_title_template"
|
||||
AnnotationBodyTemplate = "_body_template"
|
||||
AnnotationRelatedLogs = "related_logs"
|
||||
AnnotationRelatedTraces = "related_traces"
|
||||
AnnotationValue = "value"
|
||||
AnnotationThresholdValue = "threshold.value"
|
||||
AnnotationCompareOp = "compare_op"
|
||||
AnnotationMatchType = "match_type"
|
||||
AnnotationTitleTemplate = "_title_template"
|
||||
AnnotationBodyTemplate = "_body_template"
|
||||
AnnotationRelatedLogs = "related_logs"
|
||||
AnnotationRelatedLogsSamples = "related_logs_samples"
|
||||
AnnotationRelatedTraces = "related_traces"
|
||||
AnnotationValue = "value"
|
||||
AnnotationThresholdValue = "threshold.value"
|
||||
AnnotationCompareOp = "compare_op"
|
||||
AnnotationMatchType = "match_type"
|
||||
)
|
||||
|
||||
Reference in New Issue
Block a user