plugin/errors: add show_first option to consolidate (#7702) (#7703)

Add optional show_first flag to consolidate directive that logs the first error immediately and then consolidates subsequent errors. When show_first is enabled: - The first matching error is logged immediately with full details (rcode, domain, type, error message) using the configured log level - Subsequent matching errors are consolidated during the period - At period end: - If only one error occurred, no summary is printed (already logged) - If multiple errors occurred, summary shows the total count Syntax: consolidate DURATION REGEXP [LEVEL] [show_first] Example with 3 errors: [WARNING] 2 example.org. A: read udp 10.0.0.1:53->8.8.8.8:53: i/o timeout [WARNING] 3 errors like '^read udp .* i/o timeout$' occurred in last 30s Example with 1 error: [WARNING] 2 example.org. A: read udp 10.0.0.1:53->8.8.8.8:53: i/o timeout Implementation details: - Add showFirst bool to pattern struct - Rename inc() to consolidateError(), return false for showFirst case - Use function pointer in ServeDNS to unify log calls with proper level - Simplify logPattern() with single condition (cnt > 1 || !showFirst) - Refactor parseLogLevel() to parseOptionalParams() with map-based dispatch - Validate parameter order: log level must come before show_first - Update README.md with show_first documentation and examples - Add comprehensive test cases for show_first functionality Signed-off-by: cangming <cangming@cangming.app>
2026-01-26 02:03:09 -05:00 · 2025-12-10 10:15:49 +08:00
parent 0233f3e7c6
commit cad961f75f
5 changed files with 293 additions and 31 deletions
--- a/plugin/errors/README.md
+++ b/plugin/errors/README.md
@@ -23,7 +23,7 @@ Extra knobs are available with an expanded syntax:
 ~~~
 errors {
 	stacktrace
-	consolidate DURATION REGEXP [LEVEL]
+	consolidate DURATION REGEXP [LEVEL] [show_first]
 }
 ~~~

@@ -35,6 +35,21 @@ log level, which is configurable by optional option **LEVEL**. Supported options
 2 errors like '^read udp .* i/o timeout$' occurred in last 30s
 ~~~

+If the optional `show_first` flag is specified, the first error will be logged immediately when it occurs, and then subsequent matching errors will be consolidated. When the consolidation period ends:
+- If only one error occurred, no summary is printed (since it was already logged)
+- If multiple errors occurred, a summary is printed showing the total count
+
+Example with 3 errors:
+~~~
+[WARNING] 2 example.org. A: read udp 10.0.0.1:53->8.8.8.8:53: i/o timeout
+[WARNING] 3 errors like '^read udp .* i/o timeout$' occurred in last 30s
+~~~
+
+Example with 1 error:
+~~~
+[WARNING] 2 example.org. A: read udp 10.0.0.1:53->8.8.8.8:53: i/o timeout
+~~~
+
 Multiple `consolidate` options with different **DURATION** and **REGEXP** are allowed. In case if some error message corresponds to several defined regular expressions the message will be associated with the first appropriate **REGEXP**.

 For better performance, it's recommended to use the `^` or `$` metacharacters in regular expression when filtering error messages by prefix or suffix, e.g. `^failed to .*`, or `.* timeout$`.
@@ -63,3 +78,16 @@ and errors with prefix "Failed to " as errors.
    }
 }
 ~~~
+
+Use the *forward* plugin and consolidate timeout errors with `show_first` option to see both
+the summary and the first occurrence of the error:
+
+~~~ corefile
+. {
+    forward . 8.8.8.8
+    errors {
+        consolidate 5m ".* i/o timeout$" warning show_first
+        consolidate 30s "^Failed to .+" error show_first
+    }
+}
+~~~
--- a/plugin/errors/errors.go
+++ b/plugin/errors/errors.go
@@ -23,6 +23,7 @@ type pattern struct {
 	period      time.Duration
 	pattern     *regexp.Regexp
 	logCallback func(format string, v ...any)
+	showFirst   bool
 }

 func (p *pattern) timer() *time.Timer {
@@ -46,17 +47,24 @@ func newErrorHandler() *errorHandler {

 func (h *errorHandler) logPattern(i int) {
 	cnt := atomic.SwapUint32(&h.patterns[i].count, 0)
-	if cnt > 0 {
+	if cnt == 0 {
+		return
+	}
+	if cnt > 1 || !h.patterns[i].showFirst {
 		h.patterns[i].logCallback("%d errors like '%s' occurred in last %s",
 			cnt, h.patterns[i].pattern.String(), h.patterns[i].period)
 	}
 }

-func (h *errorHandler) inc(i int) bool {
+// consolidateError records an error occurrence for pattern i.
+// Returns false when cnt == 1 and showFirst is configured, so the error
+// will be printed by the caller using the pattern's logCallback.
+func (h *errorHandler) consolidateError(i int) bool {
 	if atomic.LoadUint32(&h.stopFlag) > 0 {
 		return false
 	}
-	if atomic.AddUint32(&h.patterns[i].count, 1) == 1 {
+	cnt := atomic.AddUint32(&h.patterns[i].count, 1)
+	if cnt == 1 {
 		ind := i
 		t := time.AfterFunc(h.patterns[ind].period, func() {
 			h.logPattern(ind)
@@ -65,6 +73,9 @@ func (h *errorHandler) inc(i int) bool {
 		if atomic.LoadUint32(&h.stopFlag) > 0 && t.Stop() {
 			h.logPattern(ind)
 		}
+		// If showFirst is enabled, return false so the first error
+		// will be printed by the caller using the pattern's logCallback
+		return !h.patterns[i].showFirst
 	}
 	return true
 }
@@ -85,16 +96,26 @@ func (h *errorHandler) ServeDNS(ctx context.Context, w dns.ResponseWriter, r *dn

 	if err != nil {
 		strErr := err.Error()
+		state := request.Request{W: w, Req: r}
+
+		// Default to error logging
+		logFunc := log.Errorf
+
 		for i := range h.patterns {
 			if h.patterns[i].pattern.MatchString(strErr) {
-				if h.inc(i) {
+				if h.consolidateError(i) {
+					// Error is consolidated, no need to log
 					return rcode, err
 				}
+				// consolidateError returned false (showFirst case)
+				// Use the pattern's configured log level
+				logFunc = h.patterns[i].logCallback
 				break
 			}
 		}
-		state := request.Request{W: w, Req: r}
-		log.Errorf("%d %s %s: %s", rcode, state.Name(), state.Type(), strErr)
+
+		// Log with the appropriate log level
+		logFunc("%d %s %s: %s", rcode, state.Name(), state.Type(), strErr)
 	}

 	return rcode, err
--- a/plugin/errors/errors_test.go
+++ b/plugin/errors/errors_test.go
@@ -134,13 +134,13 @@ func TestInc(t *testing.T) {
 		}},
 	}

-	ret := h.inc(0)
+	ret := h.consolidateError(0)
 	if ret {
 		t.Error("Unexpected return value, expected false, actual true")
 	}

 	h.stopFlag = 0
-	ret = h.inc(0)
+	ret = h.consolidateError(0)
 	if !ret {
 		t.Error("Unexpected return value, expected true, actual false")
 	}
@@ -156,7 +156,7 @@ func TestInc(t *testing.T) {
 		t.Error("Unexpected 'timer', expected not nil")
 	}

-	ret = h.inc(0)
+	ret = h.consolidateError(0)
 	if !ret {
 		t.Error("Unexpected return value, expected true, actual false")
 	}
@@ -194,13 +194,14 @@ func TestStop(t *testing.T) {
 		}},
 	}

-	h.inc(0)
-	h.inc(0)
-	h.inc(0)
+	h.consolidateError(0)
+	h.consolidateError(0)
+	h.consolidateError(0)
 	expCnt := uint32(3)
 	actCnt := atomic.LoadUint32(&h.patterns[0].count)
 	if actCnt != expCnt {
-		t.Fatalf("Unexpected initial 'count', expected %d, actual %d", expCnt, actCnt)
+		t.Errorf("Unexpected initial 'count', expected %d, actual %d", expCnt, actCnt)
+		return
 	}

 	h.stop()
@@ -230,6 +231,94 @@ func TestStop(t *testing.T) {
 	}
 }

+func TestShowFirst(t *testing.T) {
+	tests := []struct {
+		name              string
+		errorCount        int
+		expectSummary     string
+		shouldHaveSummary bool
+	}{
+		{
+			name:              "multiple errors",
+			errorCount:        3,
+			expectSummary:     "3 errors like '^error.*!$' occurred in last 2s",
+			shouldHaveSummary: true,
+		},
+		{
+			name:              "single error",
+			errorCount:        1,
+			expectSummary:     "",
+			shouldHaveSummary: false,
+		},
+	}
+
+	for _, tt := range tests {
+		t.Run(tt.name, func(t *testing.T) {
+			buf := bytes.Buffer{}
+			clog.D.Set()
+			golog.SetOutput(&buf)
+
+			h := &errorHandler{
+				patterns: []*pattern{{
+					count:       0,
+					period:      2 * time.Second,
+					pattern:     regexp.MustCompile("^error.*!$"),
+					logCallback: log.Errorf,
+					showFirst:   true,
+				}},
+			}
+
+			// Add errors and verify return values
+			for i := range tt.errorCount {
+				ret := h.consolidateError(0)
+				if i == 0 {
+					// First call should return false (showFirst enabled)
+					if ret {
+						t.Errorf("First consolidateError call: expected false, got true")
+					}
+					// Simulate ServeDNS logging with pattern's logCallback
+					h.patterns[0].logCallback("2 example.org. A: error %d!", i+1)
+				} else {
+					// Subsequent calls should return true (consolidated)
+					if !ret {
+						t.Errorf("consolidateError call %d: expected true, got false", i+1)
+					}
+				}
+			}
+
+			// Check count
+			expCnt := uint32(tt.errorCount)
+			actCnt := atomic.LoadUint32(&h.patterns[0].count)
+			if actCnt != expCnt {
+				t.Errorf("Unexpected 'count', expected %d, actual %d", expCnt, actCnt)
+				return
+			}
+
+			// Check that first error was logged
+			output1 := buf.String()
+			if !strings.Contains(output1, "2 example.org. A: error 1!") {
+				t.Errorf("Expected first error to be logged, but got: %q", output1)
+			}
+
+			// Clear buffer and trigger log pattern
+			buf.Reset()
+			h.logPattern(0)
+
+			// Verify summary in logPattern output
+			output2 := buf.String()
+			if tt.shouldHaveSummary {
+				if !strings.Contains(output2, tt.expectSummary) {
+					t.Errorf("Expected summary %q not found in logPattern output: %q", tt.expectSummary, output2)
+				}
+			} else {
+				if strings.Contains(output2, "errors like") {
+					t.Errorf("Did not expect summary for single error, but got: %q", output2)
+				}
+			}
+		})
+	}
+}
+
 func genErrorHandler(rcode int, err error) plugin.Handler {
 	return plugin.HandlerFunc(func(ctx context.Context, w dns.ResponseWriter, r *dns.Msg) (int, error) {
 		return rcode, err
--- a/plugin/errors/setup.go
+++ b/plugin/errors/setup.go
@@ -71,7 +71,7 @@ func errorsParse(c *caddy.Controller) (*errorHandler, error) {

 func parseConsolidate(c *caddy.Controller) (*pattern, error) {
 	args := c.RemainingArgs()
-	if len(args) < 2 || len(args) > 3 {
+	if len(args) < 2 || len(args) > 4 {
 		return nil, c.ArgErr()
 	}
 	p, err := time.ParseDuration(args[0])
@@ -82,28 +82,48 @@ func parseConsolidate(c *caddy.Controller) (*pattern, error) {
 	if err != nil {
 		return nil, c.Err(err.Error())
 	}
-	lc, err := parseLogLevel(c, args)
+
+	lc, showFirst, err := parseOptionalParams(c, args[2:])
 	if err != nil {
 		return nil, err
 	}
-	return &pattern{period: p, pattern: re, logCallback: lc}, nil
+
+	return &pattern{period: p, pattern: re, logCallback: lc, showFirst: showFirst}, nil
 }

-func parseLogLevel(c *caddy.Controller, args []string) (func(format string, v ...any), error) {
-	if len(args) != 3 {
-		return log.Errorf, nil
+// parseOptionalParams parses optional parameters (log level and show_first flag).
+// Order: log level (optional) must come before show_first (optional).
+func parseOptionalParams(c *caddy.Controller, args []string) (func(format string, v ...any), bool, error) {
+	logLevels := map[string]func(format string, v ...any){
+		"warning": log.Warningf,
+		"error":   log.Errorf,
+		"info":    log.Infof,
+		"debug":   log.Debugf,
 	}

-	switch args[2] {
-	case "warning":
-		return log.Warningf, nil
-	case "error":
-		return log.Errorf, nil
-	case "info":
-		return log.Infof, nil
-	case "debug":
-		return log.Debugf, nil
-	default:
-		return nil, c.Errf("unknown log level argument in consolidate: %s", args[2])
+	var logCallback func(format string, v ...any) // nil means not set yet
+	showFirst := false
+
+	for _, arg := range args {
+		if callback, isLogLevel := logLevels[arg]; isLogLevel {
+			if logCallback != nil {
+				return nil, false, c.Errf("multiple log levels specified in consolidate")
+			}
+			if showFirst {
+				return nil, false, c.Errf("log level must come before show_first in consolidate")
+			}
+			logCallback = callback
+		} else if arg == "show_first" {
+			showFirst = true
+		} else {
+			return nil, false, c.Errf("unknown option in consolidate: %s", arg)
+		}
 	}
+
+	// Use default log level if not specified
+	if logCallback == nil {
+		logCallback = log.Errorf
+	}
+
+	return logCallback, showFirst, nil
 }
--- a/plugin/errors/setup_test.go
+++ b/plugin/errors/setup_test.go
@@ -65,6 +65,27 @@ func TestErrorsParse(t *testing.T) {
 		    consolidate 1m error1
 		    consolidate 5s error2
 		  }`, false, 2, false},
+		{`errors {
+		    consolidate 1m error show_first
+		  }`, false, 1, false},
+		{`errors {
+		    consolidate 1m error warning show_first
+		  }`, false, 1, false},
+		{`errors {
+		    consolidate 1m error invalid_option
+		  }`, true, 0, false},
+		{`errors {
+		    consolidate 1m error warning error
+		  }`, true, 0, false},
+		{`errors {
+		    consolidate 1m error info debug
+		  }`, true, 0, false},
+		{`errors {
+		    consolidate 1m error show_first warning
+		  }`, true, 0, false},
+		{`errors {
+		    consolidate 1m error show_first info
+		  }`, true, 0, false},
 	}
 	for i, test := range tests {
 		c := caddy.NewTestController("dns", test.inputErrorsRules)
@@ -146,3 +167,86 @@ func TestProperLogCallbackIsSet(t *testing.T) {
 		})
 	}
 }
+
+func TestShowFirstOption(t *testing.T) {
+	tests := []struct {
+		name             string
+		inputErrorsRules string
+		wantShowFirst    bool
+		wantLogLevel     string
+	}{
+		{
+			name: "show_first without log level",
+			inputErrorsRules: `errors {
+		        consolidate 1m .* show_first
+		    }`,
+			wantShowFirst: true,
+			wantLogLevel:  "[ERROR]", // default
+		},
+		{
+			name: "show_first with warning log level",
+			inputErrorsRules: `errors {
+		        consolidate 1m .* warning show_first
+		    }`,
+			wantShowFirst: true,
+			wantLogLevel:  "[WARNING]",
+		},
+		{
+			name: "show_first with error log level",
+			inputErrorsRules: `errors {
+		        consolidate 1m .* error show_first
+		    }`,
+			wantShowFirst: true,
+			wantLogLevel:  "[ERROR]",
+		},
+		{
+			name: "no show_first",
+			inputErrorsRules: `errors {
+		        consolidate 1m .*
+		    }`,
+			wantShowFirst: false,
+			wantLogLevel:  "[ERROR]",
+		},
+		{
+			name: "no show_first with log level",
+			inputErrorsRules: `errors {
+		        consolidate 1m .* info
+		    }`,
+			wantShowFirst: false,
+			wantLogLevel:  "[INFO]",
+		},
+	}
+
+	for _, tt := range tests {
+		t.Run(tt.name, func(t *testing.T) {
+			buf := bytes.Buffer{}
+			golog.SetOutput(&buf)
+			clog.D.Set()
+
+			c := caddy.NewTestController("dns", tt.inputErrorsRules)
+			h, err := errorsParse(c)
+
+			if err != nil {
+				t.Errorf("Failed to parse: %v", err)
+				return
+			}
+
+			if len(h.patterns) != 1 {
+				t.Errorf("Expected 1 pattern, got %d", len(h.patterns))
+				return
+			}
+
+			if h.patterns[0].showFirst != tt.wantShowFirst {
+				t.Errorf("Expected showFirst=%v, got %v", tt.wantShowFirst, h.patterns[0].showFirst)
+			}
+
+			// Test log level
+			l := h.patterns[0].logCallback
+			l("test log")
+
+			if log := buf.String(); !strings.Contains(log, tt.wantLogLevel) {
+				t.Errorf("Expected log level %q, but got %q", tt.wantLogLevel, log)
+			}
+		})
+	}
+}