From cad961f75fded61e6ba7141ad9f7b9ffff60c8c8 Mon Sep 17 00:00:00 2001 From: cangming Date: Wed, 10 Dec 2025 10:15:49 +0800 Subject: [PATCH] plugin/errors: add show_first option to consolidate (#7702) (#7703) Add optional show_first flag to consolidate directive that logs the first error immediately and then consolidates subsequent errors. When show_first is enabled: - The first matching error is logged immediately with full details (rcode, domain, type, error message) using the configured log level - Subsequent matching errors are consolidated during the period - At period end: - If only one error occurred, no summary is printed (already logged) - If multiple errors occurred, summary shows the total count Syntax: consolidate DURATION REGEXP [LEVEL] [show_first] Example with 3 errors: [WARNING] 2 example.org. A: read udp 10.0.0.1:53->8.8.8.8:53: i/o timeout [WARNING] 3 errors like '^read udp .* i/o timeout$' occurred in last 30s Example with 1 error: [WARNING] 2 example.org. A: read udp 10.0.0.1:53->8.8.8.8:53: i/o timeout Implementation details: - Add showFirst bool to pattern struct - Rename inc() to consolidateError(), return false for showFirst case - Use function pointer in ServeDNS to unify log calls with proper level - Simplify logPattern() with single condition (cnt > 1 || !showFirst) - Refactor parseLogLevel() to parseOptionalParams() with map-based dispatch - Validate parameter order: log level must come before show_first - Update README.md with show_first documentation and examples - Add comprehensive test cases for show_first functionality Signed-off-by: cangming --- plugin/errors/README.md | 30 +++++++++- plugin/errors/errors.go | 33 +++++++++-- plugin/errors/errors_test.go | 103 +++++++++++++++++++++++++++++++--- plugin/errors/setup.go | 54 ++++++++++++------ plugin/errors/setup_test.go | 104 +++++++++++++++++++++++++++++++++++ 5 files changed, 293 insertions(+), 31 deletions(-) diff --git a/plugin/errors/README.md b/plugin/errors/README.md index 27ba1058a..35210ca22 100644 --- a/plugin/errors/README.md +++ b/plugin/errors/README.md @@ -23,7 +23,7 @@ Extra knobs are available with an expanded syntax: ~~~ errors { stacktrace - consolidate DURATION REGEXP [LEVEL] + consolidate DURATION REGEXP [LEVEL] [show_first] } ~~~ @@ -35,6 +35,21 @@ log level, which is configurable by optional option **LEVEL**. Supported options 2 errors like '^read udp .* i/o timeout$' occurred in last 30s ~~~ +If the optional `show_first` flag is specified, the first error will be logged immediately when it occurs, and then subsequent matching errors will be consolidated. When the consolidation period ends: +- If only one error occurred, no summary is printed (since it was already logged) +- If multiple errors occurred, a summary is printed showing the total count + +Example with 3 errors: +~~~ +[WARNING] 2 example.org. A: read udp 10.0.0.1:53->8.8.8.8:53: i/o timeout +[WARNING] 3 errors like '^read udp .* i/o timeout$' occurred in last 30s +~~~ + +Example with 1 error: +~~~ +[WARNING] 2 example.org. A: read udp 10.0.0.1:53->8.8.8.8:53: i/o timeout +~~~ + Multiple `consolidate` options with different **DURATION** and **REGEXP** are allowed. In case if some error message corresponds to several defined regular expressions the message will be associated with the first appropriate **REGEXP**. For better performance, it's recommended to use the `^` or `$` metacharacters in regular expression when filtering error messages by prefix or suffix, e.g. `^failed to .*`, or `.* timeout$`. @@ -63,3 +78,16 @@ and errors with prefix "Failed to " as errors. } } ~~~ + +Use the *forward* plugin and consolidate timeout errors with `show_first` option to see both +the summary and the first occurrence of the error: + +~~~ corefile +. { + forward . 8.8.8.8 + errors { + consolidate 5m ".* i/o timeout$" warning show_first + consolidate 30s "^Failed to .+" error show_first + } +} +~~~ diff --git a/plugin/errors/errors.go b/plugin/errors/errors.go index 213ec77d9..31127e41a 100644 --- a/plugin/errors/errors.go +++ b/plugin/errors/errors.go @@ -23,6 +23,7 @@ type pattern struct { period time.Duration pattern *regexp.Regexp logCallback func(format string, v ...any) + showFirst bool } func (p *pattern) timer() *time.Timer { @@ -46,17 +47,24 @@ func newErrorHandler() *errorHandler { func (h *errorHandler) logPattern(i int) { cnt := atomic.SwapUint32(&h.patterns[i].count, 0) - if cnt > 0 { + if cnt == 0 { + return + } + if cnt > 1 || !h.patterns[i].showFirst { h.patterns[i].logCallback("%d errors like '%s' occurred in last %s", cnt, h.patterns[i].pattern.String(), h.patterns[i].period) } } -func (h *errorHandler) inc(i int) bool { +// consolidateError records an error occurrence for pattern i. +// Returns false when cnt == 1 and showFirst is configured, so the error +// will be printed by the caller using the pattern's logCallback. +func (h *errorHandler) consolidateError(i int) bool { if atomic.LoadUint32(&h.stopFlag) > 0 { return false } - if atomic.AddUint32(&h.patterns[i].count, 1) == 1 { + cnt := atomic.AddUint32(&h.patterns[i].count, 1) + if cnt == 1 { ind := i t := time.AfterFunc(h.patterns[ind].period, func() { h.logPattern(ind) @@ -65,6 +73,9 @@ func (h *errorHandler) inc(i int) bool { if atomic.LoadUint32(&h.stopFlag) > 0 && t.Stop() { h.logPattern(ind) } + // If showFirst is enabled, return false so the first error + // will be printed by the caller using the pattern's logCallback + return !h.patterns[i].showFirst } return true } @@ -85,16 +96,26 @@ func (h *errorHandler) ServeDNS(ctx context.Context, w dns.ResponseWriter, r *dn if err != nil { strErr := err.Error() + state := request.Request{W: w, Req: r} + + // Default to error logging + logFunc := log.Errorf + for i := range h.patterns { if h.patterns[i].pattern.MatchString(strErr) { - if h.inc(i) { + if h.consolidateError(i) { + // Error is consolidated, no need to log return rcode, err } + // consolidateError returned false (showFirst case) + // Use the pattern's configured log level + logFunc = h.patterns[i].logCallback break } } - state := request.Request{W: w, Req: r} - log.Errorf("%d %s %s: %s", rcode, state.Name(), state.Type(), strErr) + + // Log with the appropriate log level + logFunc("%d %s %s: %s", rcode, state.Name(), state.Type(), strErr) } return rcode, err diff --git a/plugin/errors/errors_test.go b/plugin/errors/errors_test.go index d06c854f3..2d5bd3002 100644 --- a/plugin/errors/errors_test.go +++ b/plugin/errors/errors_test.go @@ -134,13 +134,13 @@ func TestInc(t *testing.T) { }}, } - ret := h.inc(0) + ret := h.consolidateError(0) if ret { t.Error("Unexpected return value, expected false, actual true") } h.stopFlag = 0 - ret = h.inc(0) + ret = h.consolidateError(0) if !ret { t.Error("Unexpected return value, expected true, actual false") } @@ -156,7 +156,7 @@ func TestInc(t *testing.T) { t.Error("Unexpected 'timer', expected not nil") } - ret = h.inc(0) + ret = h.consolidateError(0) if !ret { t.Error("Unexpected return value, expected true, actual false") } @@ -194,13 +194,14 @@ func TestStop(t *testing.T) { }}, } - h.inc(0) - h.inc(0) - h.inc(0) + h.consolidateError(0) + h.consolidateError(0) + h.consolidateError(0) expCnt := uint32(3) actCnt := atomic.LoadUint32(&h.patterns[0].count) if actCnt != expCnt { - t.Fatalf("Unexpected initial 'count', expected %d, actual %d", expCnt, actCnt) + t.Errorf("Unexpected initial 'count', expected %d, actual %d", expCnt, actCnt) + return } h.stop() @@ -230,6 +231,94 @@ func TestStop(t *testing.T) { } } +func TestShowFirst(t *testing.T) { + tests := []struct { + name string + errorCount int + expectSummary string + shouldHaveSummary bool + }{ + { + name: "multiple errors", + errorCount: 3, + expectSummary: "3 errors like '^error.*!$' occurred in last 2s", + shouldHaveSummary: true, + }, + { + name: "single error", + errorCount: 1, + expectSummary: "", + shouldHaveSummary: false, + }, + } + + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + buf := bytes.Buffer{} + clog.D.Set() + golog.SetOutput(&buf) + + h := &errorHandler{ + patterns: []*pattern{{ + count: 0, + period: 2 * time.Second, + pattern: regexp.MustCompile("^error.*!$"), + logCallback: log.Errorf, + showFirst: true, + }}, + } + + // Add errors and verify return values + for i := range tt.errorCount { + ret := h.consolidateError(0) + if i == 0 { + // First call should return false (showFirst enabled) + if ret { + t.Errorf("First consolidateError call: expected false, got true") + } + // Simulate ServeDNS logging with pattern's logCallback + h.patterns[0].logCallback("2 example.org. A: error %d!", i+1) + } else { + // Subsequent calls should return true (consolidated) + if !ret { + t.Errorf("consolidateError call %d: expected true, got false", i+1) + } + } + } + + // Check count + expCnt := uint32(tt.errorCount) + actCnt := atomic.LoadUint32(&h.patterns[0].count) + if actCnt != expCnt { + t.Errorf("Unexpected 'count', expected %d, actual %d", expCnt, actCnt) + return + } + + // Check that first error was logged + output1 := buf.String() + if !strings.Contains(output1, "2 example.org. A: error 1!") { + t.Errorf("Expected first error to be logged, but got: %q", output1) + } + + // Clear buffer and trigger log pattern + buf.Reset() + h.logPattern(0) + + // Verify summary in logPattern output + output2 := buf.String() + if tt.shouldHaveSummary { + if !strings.Contains(output2, tt.expectSummary) { + t.Errorf("Expected summary %q not found in logPattern output: %q", tt.expectSummary, output2) + } + } else { + if strings.Contains(output2, "errors like") { + t.Errorf("Did not expect summary for single error, but got: %q", output2) + } + } + }) + } +} + func genErrorHandler(rcode int, err error) plugin.Handler { return plugin.HandlerFunc(func(ctx context.Context, w dns.ResponseWriter, r *dns.Msg) (int, error) { return rcode, err diff --git a/plugin/errors/setup.go b/plugin/errors/setup.go index 78b12f3a6..f6e99aba0 100644 --- a/plugin/errors/setup.go +++ b/plugin/errors/setup.go @@ -71,7 +71,7 @@ func errorsParse(c *caddy.Controller) (*errorHandler, error) { func parseConsolidate(c *caddy.Controller) (*pattern, error) { args := c.RemainingArgs() - if len(args) < 2 || len(args) > 3 { + if len(args) < 2 || len(args) > 4 { return nil, c.ArgErr() } p, err := time.ParseDuration(args[0]) @@ -82,28 +82,48 @@ func parseConsolidate(c *caddy.Controller) (*pattern, error) { if err != nil { return nil, c.Err(err.Error()) } - lc, err := parseLogLevel(c, args) + + lc, showFirst, err := parseOptionalParams(c, args[2:]) if err != nil { return nil, err } - return &pattern{period: p, pattern: re, logCallback: lc}, nil + + return &pattern{period: p, pattern: re, logCallback: lc, showFirst: showFirst}, nil } -func parseLogLevel(c *caddy.Controller, args []string) (func(format string, v ...any), error) { - if len(args) != 3 { - return log.Errorf, nil +// parseOptionalParams parses optional parameters (log level and show_first flag). +// Order: log level (optional) must come before show_first (optional). +func parseOptionalParams(c *caddy.Controller, args []string) (func(format string, v ...any), bool, error) { + logLevels := map[string]func(format string, v ...any){ + "warning": log.Warningf, + "error": log.Errorf, + "info": log.Infof, + "debug": log.Debugf, } - switch args[2] { - case "warning": - return log.Warningf, nil - case "error": - return log.Errorf, nil - case "info": - return log.Infof, nil - case "debug": - return log.Debugf, nil - default: - return nil, c.Errf("unknown log level argument in consolidate: %s", args[2]) + var logCallback func(format string, v ...any) // nil means not set yet + showFirst := false + + for _, arg := range args { + if callback, isLogLevel := logLevels[arg]; isLogLevel { + if logCallback != nil { + return nil, false, c.Errf("multiple log levels specified in consolidate") + } + if showFirst { + return nil, false, c.Errf("log level must come before show_first in consolidate") + } + logCallback = callback + } else if arg == "show_first" { + showFirst = true + } else { + return nil, false, c.Errf("unknown option in consolidate: %s", arg) + } } + + // Use default log level if not specified + if logCallback == nil { + logCallback = log.Errorf + } + + return logCallback, showFirst, nil } diff --git a/plugin/errors/setup_test.go b/plugin/errors/setup_test.go index 5dbc9eca6..c09d6cbbc 100644 --- a/plugin/errors/setup_test.go +++ b/plugin/errors/setup_test.go @@ -65,6 +65,27 @@ func TestErrorsParse(t *testing.T) { consolidate 1m error1 consolidate 5s error2 }`, false, 2, false}, + {`errors { + consolidate 1m error show_first + }`, false, 1, false}, + {`errors { + consolidate 1m error warning show_first + }`, false, 1, false}, + {`errors { + consolidate 1m error invalid_option + }`, true, 0, false}, + {`errors { + consolidate 1m error warning error + }`, true, 0, false}, + {`errors { + consolidate 1m error info debug + }`, true, 0, false}, + {`errors { + consolidate 1m error show_first warning + }`, true, 0, false}, + {`errors { + consolidate 1m error show_first info + }`, true, 0, false}, } for i, test := range tests { c := caddy.NewTestController("dns", test.inputErrorsRules) @@ -146,3 +167,86 @@ func TestProperLogCallbackIsSet(t *testing.T) { }) } } + +func TestShowFirstOption(t *testing.T) { + tests := []struct { + name string + inputErrorsRules string + wantShowFirst bool + wantLogLevel string + }{ + { + name: "show_first without log level", + inputErrorsRules: `errors { + consolidate 1m .* show_first + }`, + wantShowFirst: true, + wantLogLevel: "[ERROR]", // default + }, + { + name: "show_first with warning log level", + inputErrorsRules: `errors { + consolidate 1m .* warning show_first + }`, + wantShowFirst: true, + wantLogLevel: "[WARNING]", + }, + { + name: "show_first with error log level", + inputErrorsRules: `errors { + consolidate 1m .* error show_first + }`, + wantShowFirst: true, + wantLogLevel: "[ERROR]", + }, + { + name: "no show_first", + inputErrorsRules: `errors { + consolidate 1m .* + }`, + wantShowFirst: false, + wantLogLevel: "[ERROR]", + }, + { + name: "no show_first with log level", + inputErrorsRules: `errors { + consolidate 1m .* info + }`, + wantShowFirst: false, + wantLogLevel: "[INFO]", + }, + } + + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + buf := bytes.Buffer{} + golog.SetOutput(&buf) + clog.D.Set() + + c := caddy.NewTestController("dns", tt.inputErrorsRules) + h, err := errorsParse(c) + + if err != nil { + t.Errorf("Failed to parse: %v", err) + return + } + + if len(h.patterns) != 1 { + t.Errorf("Expected 1 pattern, got %d", len(h.patterns)) + return + } + + if h.patterns[0].showFirst != tt.wantShowFirst { + t.Errorf("Expected showFirst=%v, got %v", tt.wantShowFirst, h.patterns[0].showFirst) + } + + // Test log level + l := h.patterns[0].logCallback + l("test log") + + if log := buf.String(); !strings.Contains(log, tt.wantLogLevel) { + t.Errorf("Expected log level %q, but got %q", tt.wantLogLevel, log) + } + }) + } +}